numbl 0.1.6 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +53 -2
- package/dist-cli/cli.js +38743 -24679
- package/dist-lib/lib.js +43424 -30466
- package/dist-lib/numbl-core/executeCode.d.ts +22 -0
- package/dist-lib/numbl-core/helpers/bessel.d.ts +9 -0
- package/dist-lib/numbl-core/helpers/reduction-helpers.d.ts +7 -2
- package/dist-lib/numbl-core/interpreter/builtins/datetime.d.ts +39 -0
- package/dist-lib/numbl-core/interpreter/builtins/index.d.ts +1 -0
- package/dist-lib/numbl-core/interpreter/builtins/time-system.d.ts +1 -0
- package/dist-lib/numbl-core/interpreter/builtins/types.d.ts +100 -5
- package/dist-lib/numbl-core/interpreter/interpreter.d.ts +41 -3
- package/dist-lib/numbl-core/interpreter/interpreterSpecialBuiltins.d.ts +2 -0
- package/dist-lib/numbl-core/interpreter/types.d.ts +16 -7
- package/dist-lib/numbl-core/jit/c/abi.d.ts +90 -0
- package/dist-lib/numbl-core/jit/c/assemble.d.ts +56 -0
- package/dist-lib/numbl-core/jit/c/classify.d.ts +70 -0
- package/dist-lib/numbl-core/jit/c/compile.d.ts +37 -0
- package/dist-lib/numbl-core/jit/c/context.d.ts +152 -0
- package/dist-lib/numbl-core/jit/c/emit/assign.d.ts +20 -0
- package/dist-lib/numbl-core/jit/c/emit/complexScalar.d.ts +18 -0
- package/dist-lib/numbl-core/jit/c/emit/fused.d.ts +42 -0
- package/dist-lib/numbl-core/jit/c/emit/helpers.d.ts +40 -0
- package/dist-lib/numbl-core/jit/c/emit/index.d.ts +14 -0
- package/dist-lib/numbl-core/jit/c/emit/scalar.d.ts +23 -0
- package/dist-lib/numbl-core/jit/c/emit/stmt.d.ts +25 -0
- package/dist-lib/numbl-core/jit/c/emit/tensor.d.ts +127 -0
- package/dist-lib/numbl-core/jit/c/emit/userCall.d.ts +58 -0
- package/dist-lib/numbl-core/jit/c/epilogue.d.ts +26 -0
- package/dist-lib/numbl-core/jit/c/feasibility.d.ts +44 -0
- package/dist-lib/numbl-core/jit/c/hybrid.d.ts +42 -0
- package/dist-lib/numbl-core/jit/c/install.d.ts +15 -0
- package/dist-lib/numbl-core/jit/c/parityError.d.ts +26 -0
- package/dist-lib/numbl-core/jit/c/prelude.d.ts +37 -0
- package/dist-lib/numbl-core/jit/c/registry.d.ts +51 -0
- package/dist-lib/numbl-core/jit/c/visit.d.ts +63 -0
- package/dist-lib/numbl-core/jit/e1/install.d.ts +13 -0
- package/dist-lib/numbl-core/jit/e1/kernelEmit.d.ts +54 -0
- package/dist-lib/numbl-core/jit/e1/openmpFlag.d.ts +13 -0
- package/dist-lib/numbl-core/jit/e1/scalarFnKernel.d.ts +44 -0
- package/dist-lib/numbl-core/jit/fusedChainHelpers.d.ts +65 -0
- package/dist-lib/numbl-core/jit/fusedScalarEmit.d.ts +61 -0
- package/dist-lib/numbl-core/jit/fusion.d.ts +71 -0
- package/dist-lib/numbl-core/jit/fusionOps.d.ts +25 -0
- package/dist-lib/numbl-core/{interpreter/jit → jit}/index.d.ts +2 -2
- package/dist-lib/numbl-core/jit/jitBailSafety.d.ts +41 -0
- package/dist-lib/numbl-core/{interpreter/jit → jit}/jitLoop.d.ts +2 -2
- package/dist-lib/numbl-core/{interpreter/jit → jit}/jitLoopAnalysis.d.ts +13 -1
- package/dist-lib/numbl-core/jit/jitLower.d.ts +122 -0
- package/dist-lib/numbl-core/jit/jitLowerExpr.d.ts +27 -0
- package/dist-lib/numbl-core/jit/jitLowerStmt.d.ts +9 -0
- package/dist-lib/numbl-core/jit/jitLowerTypes.d.ts +29 -0
- package/dist-lib/numbl-core/jit/jitTopLevel.d.ts +22 -0
- package/dist-lib/numbl-core/jit/jitTypes.d.ts +394 -0
- package/dist-lib/numbl-core/jit/js/jitCodegen.d.ts +7 -0
- package/dist-lib/numbl-core/jit/js/jitCodegenHoist.d.ts +70 -0
- package/dist-lib/numbl-core/jit/js/jitHelpers.d.ts +34 -0
- package/dist-lib/numbl-core/jit/js/jitHelpersComplex.d.ts +21 -0
- package/dist-lib/numbl-core/jit/js/jitHelpersIndex.d.ts +33 -0
- package/dist-lib/numbl-core/jit/js/jitHelpersTensor.d.ts +34 -0
- package/dist-lib/numbl-core/jit/js/jsFusedCodegen.d.ts +17 -0
- package/dist-lib/numbl-core/jit/scalarEmit.d.ts +58 -0
- package/dist-lib/numbl-core/lexer/types.d.ts +2 -1
- package/dist-lib/numbl-core/native/lapack-bridge.d.ts +46 -1
- package/dist-lib/numbl-core/ops/bessel.d.ts +18 -0
- package/dist-lib/numbl-core/ops/comparison.d.ts +11 -0
- package/dist-lib/numbl-core/ops/complexBinaryElemwise.d.ts +10 -0
- package/dist-lib/numbl-core/ops/complexUnaryElemwise.d.ts +8 -0
- package/dist-lib/numbl-core/ops/dispatch.d.ts +26 -0
- package/dist-lib/numbl-core/ops/index.d.ts +8 -0
- package/dist-lib/numbl-core/ops/opCodes.d.ts +70 -0
- package/dist-lib/numbl-core/ops/realBinaryElemwise.d.ts +8 -0
- package/dist-lib/numbl-core/ops/realUnaryElemwise.d.ts +5 -0
- package/dist-lib/numbl-core/ops/reduce.d.ts +6 -0
- package/dist-lib/numbl-core/parser/types.d.ts +6 -0
- package/dist-lib/numbl-core/runtime/alloc.d.ts +23 -0
- package/dist-lib/numbl-core/runtime/constructors.d.ts +2 -1
- package/dist-lib/numbl-core/runtime/error.d.ts +3 -0
- package/dist-lib/numbl-core/runtime/index.d.ts +1 -1
- package/dist-lib/numbl-core/runtime/runtime.d.ts +15 -2
- package/dist-lib/numbl-core/runtime/runtimePlot.d.ts +11 -0
- package/dist-lib/numbl-core/runtime/types.d.ts +16 -1
- package/dist-lib/numbl-core/runtime/utils.d.ts +3 -1
- package/dist-lib/numbl-core/version.d.ts +1 -1
- package/dist-plot-viewer/assets/{index-vtrJ8bml.js → index-GiUNnMQg.js} +1 -1
- package/dist-plot-viewer/index.html +1 -1
- package/native/elemwise.cpp +134 -0
- package/native/jit_runtime/jit_runtime.c +261 -0
- package/native/jit_runtime/jit_runtime.h +204 -0
- package/native/numbl_addon.cpp +55 -1
- package/native/numbl_addon_common.h +1 -0
- package/native/ops/bessel.c +572 -0
- package/native/ops/comparison.c +150 -0
- package/native/ops/complex_binary_elemwise.c +192 -0
- package/native/ops/complex_unary_elemwise.c +152 -0
- package/native/ops/numbl_ops.c +66 -0
- package/native/ops/numbl_ops.h +262 -0
- package/native/ops/real_binary_elemwise.c +85 -0
- package/native/ops/real_unary_elemwise.c +104 -0
- package/native/ops/reduce.c +162 -0
- package/native/ops_napi.cpp +320 -0
- package/package.json +11 -10
- package/dist-lib/numbl-core/interpreter/jit/jitCodegen.d.ts +0 -5
- package/dist-lib/numbl-core/interpreter/jit/jitHelpers.d.ts +0 -14
- package/dist-lib/numbl-core/interpreter/jit/jitLower.d.ts +0 -20
- package/dist-lib/numbl-core/interpreter/jit/jitTypes.d.ts +0 -168
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Real + complex comparison ops. Output is a logical tensor stored as
|
|
3
|
+
* 0.0 / 1.0 in a double buffer (numbl stores logicals as FloatXArray with
|
|
4
|
+
* an _isLogical flag on the runtime tensor).
|
|
5
|
+
*
|
|
6
|
+
* Caller-allocated input/output buffers; never copies.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
#include "numbl_ops.h"
|
|
10
|
+
|
|
11
|
+
static inline double rcmp(int op, double a, double b) {
|
|
12
|
+
switch (op) {
|
|
13
|
+
case NUMBL_CMP_EQ: return a == b ? 1.0 : 0.0;
|
|
14
|
+
case NUMBL_CMP_NE: return a != b ? 1.0 : 0.0;
|
|
15
|
+
case NUMBL_CMP_LT: return a < b ? 1.0 : 0.0;
|
|
16
|
+
case NUMBL_CMP_LE: return a <= b ? 1.0 : 0.0;
|
|
17
|
+
case NUMBL_CMP_GT: return a > b ? 1.0 : 0.0;
|
|
18
|
+
case NUMBL_CMP_GE: return a >= b ? 1.0 : 0.0;
|
|
19
|
+
default: return -1.0; /* sentinel; caller checks bad op */
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
int numbl_real_comparison(int op, size_t n,
|
|
24
|
+
const double* a, const double* b,
|
|
25
|
+
double* out) {
|
|
26
|
+
if (!a || !b || !out) return NUMBL_ERR_NULL_PTR;
|
|
27
|
+
if (op < 0 || op > NUMBL_CMP_GE) return NUMBL_ERR_BAD_OP;
|
|
28
|
+
switch (op) {
|
|
29
|
+
case NUMBL_CMP_EQ:
|
|
30
|
+
for (size_t i = 0; i < n; i++) out[i] = a[i] == b[i] ? 1.0 : 0.0;
|
|
31
|
+
return NUMBL_OK;
|
|
32
|
+
case NUMBL_CMP_NE:
|
|
33
|
+
for (size_t i = 0; i < n; i++) out[i] = a[i] != b[i] ? 1.0 : 0.0;
|
|
34
|
+
return NUMBL_OK;
|
|
35
|
+
case NUMBL_CMP_LT:
|
|
36
|
+
for (size_t i = 0; i < n; i++) out[i] = a[i] < b[i] ? 1.0 : 0.0;
|
|
37
|
+
return NUMBL_OK;
|
|
38
|
+
case NUMBL_CMP_LE:
|
|
39
|
+
for (size_t i = 0; i < n; i++) out[i] = a[i] <= b[i] ? 1.0 : 0.0;
|
|
40
|
+
return NUMBL_OK;
|
|
41
|
+
case NUMBL_CMP_GT:
|
|
42
|
+
for (size_t i = 0; i < n; i++) out[i] = a[i] > b[i] ? 1.0 : 0.0;
|
|
43
|
+
return NUMBL_OK;
|
|
44
|
+
case NUMBL_CMP_GE:
|
|
45
|
+
for (size_t i = 0; i < n; i++) out[i] = a[i] >= b[i] ? 1.0 : 0.0;
|
|
46
|
+
return NUMBL_OK;
|
|
47
|
+
default:
|
|
48
|
+
return NUMBL_ERR_BAD_OP;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
int numbl_real_scalar_comparison(int op, size_t n,
|
|
53
|
+
double scalar, const double* arr,
|
|
54
|
+
int scalar_on_left, double* out) {
|
|
55
|
+
if (!arr || !out) return NUMBL_ERR_NULL_PTR;
|
|
56
|
+
if (op < 0 || op > NUMBL_CMP_GE) return NUMBL_ERR_BAD_OP;
|
|
57
|
+
if (scalar_on_left) {
|
|
58
|
+
for (size_t i = 0; i < n; i++) out[i] = rcmp(op, scalar, arr[i]);
|
|
59
|
+
} else {
|
|
60
|
+
for (size_t i = 0; i < n; i++) out[i] = rcmp(op, arr[i], scalar);
|
|
61
|
+
}
|
|
62
|
+
return NUMBL_OK;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
int numbl_complex_comparison(int op, size_t n,
|
|
66
|
+
const double* a_re, const double* a_im,
|
|
67
|
+
const double* b_re, const double* b_im,
|
|
68
|
+
double* out) {
|
|
69
|
+
if (!a_re || !b_re || !out) return NUMBL_ERR_NULL_PTR;
|
|
70
|
+
switch (op) {
|
|
71
|
+
case NUMBL_CMP_EQ:
|
|
72
|
+
for (size_t i = 0; i < n; i++) {
|
|
73
|
+
double ar = a_re[i], ai = a_im ? a_im[i] : 0.0;
|
|
74
|
+
double br = b_re[i], bi = b_im ? b_im[i] : 0.0;
|
|
75
|
+
out[i] = (ar == br && ai == bi) ? 1.0 : 0.0;
|
|
76
|
+
}
|
|
77
|
+
return NUMBL_OK;
|
|
78
|
+
case NUMBL_CMP_NE:
|
|
79
|
+
for (size_t i = 0; i < n; i++) {
|
|
80
|
+
double ar = a_re[i], ai = a_im ? a_im[i] : 0.0;
|
|
81
|
+
double br = b_re[i], bi = b_im ? b_im[i] : 0.0;
|
|
82
|
+
out[i] = (ar != br || ai != bi) ? 1.0 : 0.0;
|
|
83
|
+
}
|
|
84
|
+
return NUMBL_OK;
|
|
85
|
+
/* MATLAB semantics: <, <=, >, >= compare real parts only. */
|
|
86
|
+
case NUMBL_CMP_LT:
|
|
87
|
+
for (size_t i = 0; i < n; i++) out[i] = a_re[i] < b_re[i] ? 1.0 : 0.0;
|
|
88
|
+
return NUMBL_OK;
|
|
89
|
+
case NUMBL_CMP_LE:
|
|
90
|
+
for (size_t i = 0; i < n; i++) out[i] = a_re[i] <= b_re[i] ? 1.0 : 0.0;
|
|
91
|
+
return NUMBL_OK;
|
|
92
|
+
case NUMBL_CMP_GT:
|
|
93
|
+
for (size_t i = 0; i < n; i++) out[i] = a_re[i] > b_re[i] ? 1.0 : 0.0;
|
|
94
|
+
return NUMBL_OK;
|
|
95
|
+
case NUMBL_CMP_GE:
|
|
96
|
+
for (size_t i = 0; i < n; i++) out[i] = a_re[i] >= b_re[i] ? 1.0 : 0.0;
|
|
97
|
+
return NUMBL_OK;
|
|
98
|
+
default:
|
|
99
|
+
return NUMBL_ERR_BAD_OP;
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
int numbl_complex_scalar_comparison(int op, size_t n,
|
|
104
|
+
double s_re, double s_im,
|
|
105
|
+
const double* arr_re,
|
|
106
|
+
const double* arr_im,
|
|
107
|
+
int scalar_on_left, double* out) {
|
|
108
|
+
if (!arr_re || !out) return NUMBL_ERR_NULL_PTR;
|
|
109
|
+
switch (op) {
|
|
110
|
+
case NUMBL_CMP_EQ:
|
|
111
|
+
if (scalar_on_left) {
|
|
112
|
+
for (size_t i = 0; i < n; i++) {
|
|
113
|
+
double ar = arr_re[i], ai = arr_im ? arr_im[i] : 0.0;
|
|
114
|
+
out[i] = (s_re == ar && s_im == ai) ? 1.0 : 0.0;
|
|
115
|
+
}
|
|
116
|
+
} else {
|
|
117
|
+
for (size_t i = 0; i < n; i++) {
|
|
118
|
+
double ar = arr_re[i], ai = arr_im ? arr_im[i] : 0.0;
|
|
119
|
+
out[i] = (ar == s_re && ai == s_im) ? 1.0 : 0.0;
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
return NUMBL_OK;
|
|
123
|
+
case NUMBL_CMP_NE:
|
|
124
|
+
if (scalar_on_left) {
|
|
125
|
+
for (size_t i = 0; i < n; i++) {
|
|
126
|
+
double ar = arr_re[i], ai = arr_im ? arr_im[i] : 0.0;
|
|
127
|
+
out[i] = (s_re != ar || s_im != ai) ? 1.0 : 0.0;
|
|
128
|
+
}
|
|
129
|
+
} else {
|
|
130
|
+
for (size_t i = 0; i < n; i++) {
|
|
131
|
+
double ar = arr_re[i], ai = arr_im ? arr_im[i] : 0.0;
|
|
132
|
+
out[i] = (ar != s_re || ai != s_im) ? 1.0 : 0.0;
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
return NUMBL_OK;
|
|
136
|
+
/* Real-part-only comparisons. */
|
|
137
|
+
case NUMBL_CMP_LT:
|
|
138
|
+
case NUMBL_CMP_LE:
|
|
139
|
+
case NUMBL_CMP_GT:
|
|
140
|
+
case NUMBL_CMP_GE:
|
|
141
|
+
if (scalar_on_left) {
|
|
142
|
+
for (size_t i = 0; i < n; i++) out[i] = rcmp(op, s_re, arr_re[i]);
|
|
143
|
+
} else {
|
|
144
|
+
for (size_t i = 0; i < n; i++) out[i] = rcmp(op, arr_re[i], s_re);
|
|
145
|
+
}
|
|
146
|
+
return NUMBL_OK;
|
|
147
|
+
default:
|
|
148
|
+
return NUMBL_ERR_BAD_OP;
|
|
149
|
+
}
|
|
150
|
+
}
|
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Complex element-wise binary ops, split storage (op-code dispatch).
|
|
3
|
+
*
|
|
4
|
+
* Caller-allocated input/output buffers; never copies.
|
|
5
|
+
* a_im or b_im may be NULL → treat as zero.
|
|
6
|
+
* out_re and out_im are required.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
#include "numbl_ops.h"
|
|
10
|
+
|
|
11
|
+
/* Helper: produce JS-compatible Inf/NaN for division by zero, real part. */
|
|
12
|
+
static inline double cdivz_re(double r, double i) {
|
|
13
|
+
if (r == 0.0 && i == 0.0) return 0.0 / 0.0; /* NaN */
|
|
14
|
+
return (r > 0 ? 1.0 : r < 0 ? -1.0 : 0.0) / 0.0; /* ±Inf */
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
/* Imaginary-part variant of the above. */
|
|
18
|
+
static inline double cdivz_im(double r, double i) {
|
|
19
|
+
if (r == 0.0 && i == 0.0) return 0.0;
|
|
20
|
+
return (i > 0 ? 1.0 : i < 0 ? -1.0 : 0.0) / 0.0;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
int numbl_complex_binary_elemwise(int op, size_t n,
|
|
24
|
+
const double* a_re, const double* a_im,
|
|
25
|
+
const double* b_re, const double* b_im,
|
|
26
|
+
double* out_re, double* out_im) {
|
|
27
|
+
if (!a_re || !b_re || !out_re || !out_im) return NUMBL_ERR_NULL_PTR;
|
|
28
|
+
|
|
29
|
+
switch (op) {
|
|
30
|
+
case NUMBL_COMPLEX_BIN_ADD:
|
|
31
|
+
for (size_t i = 0; i < n; i++) {
|
|
32
|
+
out_re[i] = a_re[i] + b_re[i];
|
|
33
|
+
out_im[i] = (a_im ? a_im[i] : 0.0) + (b_im ? b_im[i] : 0.0);
|
|
34
|
+
}
|
|
35
|
+
return NUMBL_OK;
|
|
36
|
+
case NUMBL_COMPLEX_BIN_SUB:
|
|
37
|
+
for (size_t i = 0; i < n; i++) {
|
|
38
|
+
out_re[i] = a_re[i] - b_re[i];
|
|
39
|
+
out_im[i] = (a_im ? a_im[i] : 0.0) - (b_im ? b_im[i] : 0.0);
|
|
40
|
+
}
|
|
41
|
+
return NUMBL_OK;
|
|
42
|
+
case NUMBL_COMPLEX_BIN_MUL:
|
|
43
|
+
for (size_t i = 0; i < n; i++) {
|
|
44
|
+
double ar = a_re[i], ai = a_im ? a_im[i] : 0.0;
|
|
45
|
+
double br = b_re[i], bi = b_im ? b_im[i] : 0.0;
|
|
46
|
+
out_re[i] = ar * br - ai * bi;
|
|
47
|
+
out_im[i] = ar * bi + ai * br;
|
|
48
|
+
}
|
|
49
|
+
return NUMBL_OK;
|
|
50
|
+
case NUMBL_COMPLEX_BIN_DIV: {
|
|
51
|
+
/* Hoist the a_im / b_im presence tests out of the loop so the
|
|
52
|
+
* vectorizer sees a straight-line body. The main SIMD pass uses
|
|
53
|
+
* the naive formula (fast but wrong when br=bi=0, since 0/0
|
|
54
|
+
* produces NaN); a single sequential fix-up pass then restores
|
|
55
|
+
* the C99 "complex divide by zero" semantics on any positions
|
|
56
|
+
* where the denominator was zero. Fix-up is only entered when
|
|
57
|
+
* the main pass actually produced such a position.
|
|
58
|
+
*/
|
|
59
|
+
size_t zero_count = 0;
|
|
60
|
+
if (a_im && b_im) {
|
|
61
|
+
#pragma omp simd reduction(+:zero_count)
|
|
62
|
+
for (size_t i = 0; i < n; i++) {
|
|
63
|
+
double ar = a_re[i], ai = a_im[i];
|
|
64
|
+
double br = b_re[i], bi = b_im[i];
|
|
65
|
+
double denom = br * br + bi * bi;
|
|
66
|
+
double inv = 1.0 / denom;
|
|
67
|
+
out_re[i] = (ar * br + ai * bi) * inv;
|
|
68
|
+
out_im[i] = (ai * br - ar * bi) * inv;
|
|
69
|
+
zero_count += (denom == 0.0);
|
|
70
|
+
}
|
|
71
|
+
} else if (a_im) {
|
|
72
|
+
#pragma omp simd reduction(+:zero_count)
|
|
73
|
+
for (size_t i = 0; i < n; i++) {
|
|
74
|
+
double ar = a_re[i], ai = a_im[i];
|
|
75
|
+
double br = b_re[i];
|
|
76
|
+
double inv = 1.0 / br;
|
|
77
|
+
out_re[i] = ar * inv;
|
|
78
|
+
out_im[i] = ai * inv;
|
|
79
|
+
zero_count += (br == 0.0);
|
|
80
|
+
}
|
|
81
|
+
} else if (b_im) {
|
|
82
|
+
#pragma omp simd reduction(+:zero_count)
|
|
83
|
+
for (size_t i = 0; i < n; i++) {
|
|
84
|
+
double ar = a_re[i];
|
|
85
|
+
double br = b_re[i], bi = b_im[i];
|
|
86
|
+
double denom = br * br + bi * bi;
|
|
87
|
+
double inv = 1.0 / denom;
|
|
88
|
+
out_re[i] = (ar * br) * inv;
|
|
89
|
+
out_im[i] = (-ar * bi) * inv;
|
|
90
|
+
zero_count += (denom == 0.0);
|
|
91
|
+
}
|
|
92
|
+
} else {
|
|
93
|
+
#pragma omp simd reduction(+:zero_count)
|
|
94
|
+
for (size_t i = 0; i < n; i++) {
|
|
95
|
+
double ar = a_re[i];
|
|
96
|
+
double br = b_re[i];
|
|
97
|
+
out_re[i] = ar / br;
|
|
98
|
+
out_im[i] = 0.0;
|
|
99
|
+
zero_count += (br == 0.0);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
if (zero_count) {
|
|
103
|
+
for (size_t i = 0; i < n; i++) {
|
|
104
|
+
double br = b_re[i], bi = b_im ? b_im[i] : 0.0;
|
|
105
|
+
if (br == 0.0 && bi == 0.0) {
|
|
106
|
+
double ar = a_re[i], ai = a_im ? a_im[i] : 0.0;
|
|
107
|
+
out_re[i] = cdivz_re(ar, ai);
|
|
108
|
+
out_im[i] = cdivz_im(ar, ai);
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
return NUMBL_OK;
|
|
113
|
+
}
|
|
114
|
+
default:
|
|
115
|
+
return NUMBL_ERR_BAD_OP;
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
int numbl_complex_scalar_binary_elemwise(int op, size_t n,
|
|
120
|
+
double s_re, double s_im,
|
|
121
|
+
const double* arr_re,
|
|
122
|
+
const double* arr_im,
|
|
123
|
+
int scalar_on_left,
|
|
124
|
+
double* out_re, double* out_im) {
|
|
125
|
+
if (!arr_re || !out_re || !out_im) return NUMBL_ERR_NULL_PTR;
|
|
126
|
+
|
|
127
|
+
switch (op) {
|
|
128
|
+
case NUMBL_COMPLEX_BIN_ADD:
|
|
129
|
+
for (size_t i = 0; i < n; i++) {
|
|
130
|
+
out_re[i] = s_re + arr_re[i];
|
|
131
|
+
out_im[i] = s_im + (arr_im ? arr_im[i] : 0.0);
|
|
132
|
+
}
|
|
133
|
+
return NUMBL_OK;
|
|
134
|
+
case NUMBL_COMPLEX_BIN_SUB:
|
|
135
|
+
if (scalar_on_left) {
|
|
136
|
+
for (size_t i = 0; i < n; i++) {
|
|
137
|
+
out_re[i] = s_re - arr_re[i];
|
|
138
|
+
out_im[i] = s_im - (arr_im ? arr_im[i] : 0.0);
|
|
139
|
+
}
|
|
140
|
+
} else {
|
|
141
|
+
for (size_t i = 0; i < n; i++) {
|
|
142
|
+
out_re[i] = arr_re[i] - s_re;
|
|
143
|
+
out_im[i] = (arr_im ? arr_im[i] : 0.0) - s_im;
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
return NUMBL_OK;
|
|
147
|
+
case NUMBL_COMPLEX_BIN_MUL:
|
|
148
|
+
for (size_t i = 0; i < n; i++) {
|
|
149
|
+
double ar = arr_re[i];
|
|
150
|
+
double ai = arr_im ? arr_im[i] : 0.0;
|
|
151
|
+
out_re[i] = s_re * ar - s_im * ai;
|
|
152
|
+
out_im[i] = s_re * ai + s_im * ar;
|
|
153
|
+
}
|
|
154
|
+
return NUMBL_OK;
|
|
155
|
+
case NUMBL_COMPLEX_BIN_DIV:
|
|
156
|
+
if (scalar_on_left) {
|
|
157
|
+
for (size_t i = 0; i < n; i++) {
|
|
158
|
+
double ar = arr_re[i];
|
|
159
|
+
double ai = arr_im ? arr_im[i] : 0.0;
|
|
160
|
+
double denom = ar * ar + ai * ai;
|
|
161
|
+
if (denom == 0.0) {
|
|
162
|
+
out_re[i] = cdivz_re(s_re, s_im);
|
|
163
|
+
out_im[i] = cdivz_im(s_re, s_im);
|
|
164
|
+
} else {
|
|
165
|
+
out_re[i] = (s_re * ar + s_im * ai) / denom;
|
|
166
|
+
out_im[i] = (s_im * ar - s_re * ai) / denom;
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
} else {
|
|
170
|
+
double denom = s_re * s_re + s_im * s_im;
|
|
171
|
+
if (denom == 0.0) {
|
|
172
|
+
for (size_t i = 0; i < n; i++) {
|
|
173
|
+
double ar = arr_re[i];
|
|
174
|
+
double ai = arr_im ? arr_im[i] : 0.0;
|
|
175
|
+
out_re[i] = cdivz_re(ar, ai);
|
|
176
|
+
out_im[i] = cdivz_im(ar, ai);
|
|
177
|
+
}
|
|
178
|
+
} else {
|
|
179
|
+
double inv_denom = 1.0 / denom;
|
|
180
|
+
for (size_t i = 0; i < n; i++) {
|
|
181
|
+
double ar = arr_re[i];
|
|
182
|
+
double ai = arr_im ? arr_im[i] : 0.0;
|
|
183
|
+
out_re[i] = (ar * s_re + ai * s_im) * inv_denom;
|
|
184
|
+
out_im[i] = (ai * s_re - ar * s_im) * inv_denom;
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
return NUMBL_OK;
|
|
189
|
+
default:
|
|
190
|
+
return NUMBL_ERR_BAD_OP;
|
|
191
|
+
}
|
|
192
|
+
}
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Complex unary element-wise ops (op-code dispatch), split storage.
|
|
3
|
+
*
|
|
4
|
+
* Caller-allocated input/output buffers; never copies.
|
|
5
|
+
* a_im may be NULL → treat as zero.
|
|
6
|
+
* ABS is intentionally not supported here (output is real-valued; use
|
|
7
|
+
* numbl_complex_abs instead).
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
#include "numbl_ops.h"
|
|
11
|
+
|
|
12
|
+
#include <complex.h>
|
|
13
|
+
#include <math.h>
|
|
14
|
+
|
|
15
|
+
/* Construct a double complex from split parts. */
|
|
16
|
+
static inline double _Complex cpack(double re, double im) {
|
|
17
|
+
return re + im * I;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
#define WRITE_C(out_re_ptr, out_im_ptr, i, z) do { \
|
|
21
|
+
double _Complex _z = (z); \
|
|
22
|
+
(out_re_ptr)[i] = creal(_z); \
|
|
23
|
+
(out_im_ptr)[i] = cimag(_z); \
|
|
24
|
+
} while (0)
|
|
25
|
+
|
|
26
|
+
/* complex sign: z/|z| for z != 0, else 0. Matches MATLAB's sign(z). */
|
|
27
|
+
static inline double _Complex csign(double _Complex z) {
|
|
28
|
+
double m = cabs(z);
|
|
29
|
+
if (m == 0.0) return 0.0;
|
|
30
|
+
return z / m;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
int numbl_complex_unary_elemwise(int op, size_t n,
|
|
34
|
+
const double* a_re, const double* a_im,
|
|
35
|
+
double* out_re, double* out_im) {
|
|
36
|
+
if (!a_re || !out_re || !out_im) return NUMBL_ERR_NULL_PTR;
|
|
37
|
+
|
|
38
|
+
switch (op) {
|
|
39
|
+
case NUMBL_UNARY_EXP:
|
|
40
|
+
for (size_t i = 0; i < n; i++)
|
|
41
|
+
WRITE_C(out_re, out_im, i, cexp(cpack(a_re[i], a_im ? a_im[i] : 0.0)));
|
|
42
|
+
return NUMBL_OK;
|
|
43
|
+
case NUMBL_UNARY_LOG:
|
|
44
|
+
for (size_t i = 0; i < n; i++)
|
|
45
|
+
WRITE_C(out_re, out_im, i, clog(cpack(a_re[i], a_im ? a_im[i] : 0.0)));
|
|
46
|
+
return NUMBL_OK;
|
|
47
|
+
case NUMBL_UNARY_LOG2: {
|
|
48
|
+
const double inv_ln2 = 1.0 / log(2.0);
|
|
49
|
+
for (size_t i = 0; i < n; i++)
|
|
50
|
+
WRITE_C(out_re, out_im, i,
|
|
51
|
+
clog(cpack(a_re[i], a_im ? a_im[i] : 0.0)) * inv_ln2);
|
|
52
|
+
return NUMBL_OK;
|
|
53
|
+
}
|
|
54
|
+
case NUMBL_UNARY_LOG10: {
|
|
55
|
+
const double inv_ln10 = 1.0 / log(10.0);
|
|
56
|
+
for (size_t i = 0; i < n; i++)
|
|
57
|
+
WRITE_C(out_re, out_im, i,
|
|
58
|
+
clog(cpack(a_re[i], a_im ? a_im[i] : 0.0)) * inv_ln10);
|
|
59
|
+
return NUMBL_OK;
|
|
60
|
+
}
|
|
61
|
+
case NUMBL_UNARY_SQRT:
|
|
62
|
+
for (size_t i = 0; i < n; i++)
|
|
63
|
+
WRITE_C(out_re, out_im, i, csqrt(cpack(a_re[i], a_im ? a_im[i] : 0.0)));
|
|
64
|
+
return NUMBL_OK;
|
|
65
|
+
case NUMBL_UNARY_ABS:
|
|
66
|
+
return NUMBL_ERR_BAD_OP; /* use numbl_complex_abs */
|
|
67
|
+
case NUMBL_UNARY_FLOOR:
|
|
68
|
+
for (size_t i = 0; i < n; i++) {
|
|
69
|
+
out_re[i] = floor(a_re[i]);
|
|
70
|
+
out_im[i] = a_im ? floor(a_im[i]) : 0.0;
|
|
71
|
+
}
|
|
72
|
+
return NUMBL_OK;
|
|
73
|
+
case NUMBL_UNARY_CEIL:
|
|
74
|
+
for (size_t i = 0; i < n; i++) {
|
|
75
|
+
out_re[i] = ceil(a_re[i]);
|
|
76
|
+
out_im[i] = a_im ? ceil(a_im[i]) : 0.0;
|
|
77
|
+
}
|
|
78
|
+
return NUMBL_OK;
|
|
79
|
+
case NUMBL_UNARY_ROUND:
|
|
80
|
+
for (size_t i = 0; i < n; i++) {
|
|
81
|
+
out_re[i] = round(a_re[i]);
|
|
82
|
+
out_im[i] = a_im ? round(a_im[i]) : 0.0;
|
|
83
|
+
}
|
|
84
|
+
return NUMBL_OK;
|
|
85
|
+
case NUMBL_UNARY_TRUNC:
|
|
86
|
+
for (size_t i = 0; i < n; i++) {
|
|
87
|
+
out_re[i] = trunc(a_re[i]);
|
|
88
|
+
out_im[i] = a_im ? trunc(a_im[i]) : 0.0;
|
|
89
|
+
}
|
|
90
|
+
return NUMBL_OK;
|
|
91
|
+
case NUMBL_UNARY_SIN:
|
|
92
|
+
for (size_t i = 0; i < n; i++)
|
|
93
|
+
WRITE_C(out_re, out_im, i, csin(cpack(a_re[i], a_im ? a_im[i] : 0.0)));
|
|
94
|
+
return NUMBL_OK;
|
|
95
|
+
case NUMBL_UNARY_COS:
|
|
96
|
+
for (size_t i = 0; i < n; i++)
|
|
97
|
+
WRITE_C(out_re, out_im, i, ccos(cpack(a_re[i], a_im ? a_im[i] : 0.0)));
|
|
98
|
+
return NUMBL_OK;
|
|
99
|
+
case NUMBL_UNARY_TAN:
|
|
100
|
+
for (size_t i = 0; i < n; i++)
|
|
101
|
+
WRITE_C(out_re, out_im, i, ctan(cpack(a_re[i], a_im ? a_im[i] : 0.0)));
|
|
102
|
+
return NUMBL_OK;
|
|
103
|
+
case NUMBL_UNARY_ASIN:
|
|
104
|
+
for (size_t i = 0; i < n; i++)
|
|
105
|
+
WRITE_C(out_re, out_im, i, casin(cpack(a_re[i], a_im ? a_im[i] : 0.0)));
|
|
106
|
+
return NUMBL_OK;
|
|
107
|
+
case NUMBL_UNARY_ACOS:
|
|
108
|
+
for (size_t i = 0; i < n; i++)
|
|
109
|
+
WRITE_C(out_re, out_im, i, cacos(cpack(a_re[i], a_im ? a_im[i] : 0.0)));
|
|
110
|
+
return NUMBL_OK;
|
|
111
|
+
case NUMBL_UNARY_ATAN:
|
|
112
|
+
for (size_t i = 0; i < n; i++)
|
|
113
|
+
WRITE_C(out_re, out_im, i, catan(cpack(a_re[i], a_im ? a_im[i] : 0.0)));
|
|
114
|
+
return NUMBL_OK;
|
|
115
|
+
case NUMBL_UNARY_SINH:
|
|
116
|
+
for (size_t i = 0; i < n; i++)
|
|
117
|
+
WRITE_C(out_re, out_im, i, csinh(cpack(a_re[i], a_im ? a_im[i] : 0.0)));
|
|
118
|
+
return NUMBL_OK;
|
|
119
|
+
case NUMBL_UNARY_COSH:
|
|
120
|
+
for (size_t i = 0; i < n; i++)
|
|
121
|
+
WRITE_C(out_re, out_im, i, ccosh(cpack(a_re[i], a_im ? a_im[i] : 0.0)));
|
|
122
|
+
return NUMBL_OK;
|
|
123
|
+
case NUMBL_UNARY_TANH:
|
|
124
|
+
for (size_t i = 0; i < n; i++)
|
|
125
|
+
WRITE_C(out_re, out_im, i, ctanh(cpack(a_re[i], a_im ? a_im[i] : 0.0)));
|
|
126
|
+
return NUMBL_OK;
|
|
127
|
+
case NUMBL_UNARY_SIGN:
|
|
128
|
+
for (size_t i = 0; i < n; i++)
|
|
129
|
+
WRITE_C(out_re, out_im, i,
|
|
130
|
+
csign(cpack(a_re[i], a_im ? a_im[i] : 0.0)));
|
|
131
|
+
return NUMBL_OK;
|
|
132
|
+
default:
|
|
133
|
+
return NUMBL_ERR_BAD_OP;
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
int numbl_complex_abs(size_t n,
|
|
138
|
+
const double* a_re, const double* a_im,
|
|
139
|
+
double* out) {
|
|
140
|
+
if (!a_re || !out) return NUMBL_ERR_NULL_PTR;
|
|
141
|
+
if (a_im) {
|
|
142
|
+
#pragma omp simd
|
|
143
|
+
for (size_t i = 0; i < n; i++) {
|
|
144
|
+
/* hypot avoids overflow/underflow that (re*re + im*im) would miss. */
|
|
145
|
+
out[i] = hypot(a_re[i], a_im[i]);
|
|
146
|
+
}
|
|
147
|
+
} else {
|
|
148
|
+
#pragma omp simd
|
|
149
|
+
for (size_t i = 0; i < n; i++) out[i] = fabs(a_re[i]);
|
|
150
|
+
}
|
|
151
|
+
return NUMBL_OK;
|
|
152
|
+
}
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* numbl_ops — shared bits: error strings, op-code dump.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
#include "numbl_ops.h"
|
|
6
|
+
|
|
7
|
+
#include <stdio.h>
|
|
8
|
+
#include <string.h>
|
|
9
|
+
|
|
10
|
+
const char* numbl_strerror(int code) {
|
|
11
|
+
switch (code) {
|
|
12
|
+
case NUMBL_OK: return "ok";
|
|
13
|
+
case NUMBL_ERR_BAD_OP: return "unknown op code";
|
|
14
|
+
case NUMBL_ERR_NULL_PTR: return "null pointer argument";
|
|
15
|
+
default: return "unknown numbl error";
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
/* Dump op-code enum values in a stable text format.
|
|
20
|
+
* Used by the TS-side drift-detection test.
|
|
21
|
+
*/
|
|
22
|
+
size_t numbl_dump_op_codes(char* buf, size_t buf_size) {
|
|
23
|
+
/* Compose into a temporary; emit length only if buf is too small. */
|
|
24
|
+
char tmp[1024];
|
|
25
|
+
int n = 0;
|
|
26
|
+
n += snprintf(tmp + n, sizeof(tmp) - n,
|
|
27
|
+
"real_binary:ADD=%d,SUB=%d,MUL=%d,DIV=%d;",
|
|
28
|
+
NUMBL_REAL_BIN_ADD, NUMBL_REAL_BIN_SUB,
|
|
29
|
+
NUMBL_REAL_BIN_MUL, NUMBL_REAL_BIN_DIV);
|
|
30
|
+
n += snprintf(tmp + n, sizeof(tmp) - n,
|
|
31
|
+
"complex_binary:ADD=%d,SUB=%d,MUL=%d,DIV=%d;",
|
|
32
|
+
NUMBL_COMPLEX_BIN_ADD, NUMBL_COMPLEX_BIN_SUB,
|
|
33
|
+
NUMBL_COMPLEX_BIN_MUL, NUMBL_COMPLEX_BIN_DIV);
|
|
34
|
+
n += snprintf(tmp + n, sizeof(tmp) - n,
|
|
35
|
+
"unary:EXP=%d,LOG=%d,LOG2=%d,LOG10=%d,SQRT=%d,ABS=%d,"
|
|
36
|
+
"FLOOR=%d,CEIL=%d,ROUND=%d,TRUNC=%d,"
|
|
37
|
+
"SIN=%d,COS=%d,TAN=%d,ASIN=%d,ACOS=%d,ATAN=%d,"
|
|
38
|
+
"SINH=%d,COSH=%d,TANH=%d,SIGN=%d;",
|
|
39
|
+
NUMBL_UNARY_EXP, NUMBL_UNARY_LOG, NUMBL_UNARY_LOG2,
|
|
40
|
+
NUMBL_UNARY_LOG10, NUMBL_UNARY_SQRT, NUMBL_UNARY_ABS,
|
|
41
|
+
NUMBL_UNARY_FLOOR, NUMBL_UNARY_CEIL, NUMBL_UNARY_ROUND,
|
|
42
|
+
NUMBL_UNARY_TRUNC, NUMBL_UNARY_SIN, NUMBL_UNARY_COS,
|
|
43
|
+
NUMBL_UNARY_TAN, NUMBL_UNARY_ASIN, NUMBL_UNARY_ACOS,
|
|
44
|
+
NUMBL_UNARY_ATAN, NUMBL_UNARY_SINH, NUMBL_UNARY_COSH,
|
|
45
|
+
NUMBL_UNARY_TANH, NUMBL_UNARY_SIGN);
|
|
46
|
+
n += snprintf(tmp + n, sizeof(tmp) - n,
|
|
47
|
+
"cmp:EQ=%d,NE=%d,LT=%d,LE=%d,GT=%d,GE=%d;",
|
|
48
|
+
NUMBL_CMP_EQ, NUMBL_CMP_NE, NUMBL_CMP_LT,
|
|
49
|
+
NUMBL_CMP_LE, NUMBL_CMP_GT, NUMBL_CMP_GE);
|
|
50
|
+
n += snprintf(tmp + n, sizeof(tmp) - n,
|
|
51
|
+
"reduce:SUM=%d,PROD=%d,MAX=%d,MIN=%d,ANY=%d,ALL=%d,MEAN=%d;",
|
|
52
|
+
NUMBL_REDUCE_SUM, NUMBL_REDUCE_PROD, NUMBL_REDUCE_MAX,
|
|
53
|
+
NUMBL_REDUCE_MIN, NUMBL_REDUCE_ANY, NUMBL_REDUCE_ALL,
|
|
54
|
+
NUMBL_REDUCE_MEAN);
|
|
55
|
+
n += snprintf(tmp + n, sizeof(tmp) - n,
|
|
56
|
+
"bessel:J=%d,Y=%d,I=%d,K=%d;",
|
|
57
|
+
NUMBL_BESSEL_J, NUMBL_BESSEL_Y,
|
|
58
|
+
NUMBL_BESSEL_I, NUMBL_BESSEL_K);
|
|
59
|
+
size_t need = (size_t)n;
|
|
60
|
+
if (buf && buf_size > need) {
|
|
61
|
+
memcpy(buf, tmp, need + 1);
|
|
62
|
+
} else if (buf && buf_size > 0) {
|
|
63
|
+
buf[0] = '\0';
|
|
64
|
+
}
|
|
65
|
+
return need;
|
|
66
|
+
}
|