numbl 0.1.7 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. package/binding.gyp +59 -3
  2. package/dist-cli/cli.js +22538 -7936
  3. package/dist-lib/lib.js +34682 -20852
  4. package/dist-lib/numbl-core/executeCode.d.ts +13 -0
  5. package/dist-lib/numbl-core/fileIOAdapter.d.ts +2 -0
  6. package/dist-lib/numbl-core/helpers/reduction-helpers.d.ts +7 -2
  7. package/dist-lib/numbl-core/interpreter/builtins/datetime.d.ts +39 -0
  8. package/dist-lib/numbl-core/interpreter/builtins/index.d.ts +1 -0
  9. package/dist-lib/numbl-core/interpreter/builtins/time-system.d.ts +1 -0
  10. package/dist-lib/numbl-core/interpreter/builtins/types.d.ts +96 -5
  11. package/dist-lib/numbl-core/interpreter/interpreter.d.ts +41 -3
  12. package/dist-lib/numbl-core/interpreter/types.d.ts +1 -1
  13. package/dist-lib/numbl-core/jit/c/abi.d.ts +90 -0
  14. package/dist-lib/numbl-core/jit/c/assemble.d.ts +56 -0
  15. package/dist-lib/numbl-core/jit/c/classify.d.ts +70 -0
  16. package/dist-lib/numbl-core/jit/c/compile.d.ts +37 -0
  17. package/dist-lib/numbl-core/jit/c/context.d.ts +152 -0
  18. package/dist-lib/numbl-core/jit/c/emit/assign.d.ts +20 -0
  19. package/dist-lib/numbl-core/jit/c/emit/complexScalar.d.ts +18 -0
  20. package/dist-lib/numbl-core/jit/c/emit/fused.d.ts +42 -0
  21. package/dist-lib/numbl-core/jit/c/emit/helpers.d.ts +40 -0
  22. package/dist-lib/numbl-core/jit/c/emit/index.d.ts +14 -0
  23. package/dist-lib/numbl-core/jit/c/emit/scalar.d.ts +23 -0
  24. package/dist-lib/numbl-core/jit/c/emit/stmt.d.ts +25 -0
  25. package/dist-lib/numbl-core/jit/c/emit/tensor.d.ts +127 -0
  26. package/dist-lib/numbl-core/jit/c/emit/userCall.d.ts +58 -0
  27. package/dist-lib/numbl-core/jit/c/epilogue.d.ts +26 -0
  28. package/dist-lib/numbl-core/jit/c/feasibility.d.ts +44 -0
  29. package/dist-lib/numbl-core/jit/c/prelude.d.ts +37 -0
  30. package/dist-lib/numbl-core/jit/c/visit.d.ts +63 -0
  31. package/dist-lib/numbl-core/jit/e1/complexKernelEmit.d.ts +46 -0
  32. package/dist-lib/numbl-core/jit/e1/hash.d.ts +10 -0
  33. package/dist-lib/numbl-core/jit/e1/install.d.ts +13 -0
  34. package/dist-lib/numbl-core/jit/e1/kernelEmit.d.ts +54 -0
  35. package/dist-lib/numbl-core/jit/e1/multiReductionKernel.d.ts +66 -0
  36. package/dist-lib/numbl-core/jit/e1/openmpFlag.d.ts +13 -0
  37. package/dist-lib/numbl-core/jit/e1/scalarFnKernel.d.ts +44 -0
  38. package/dist-lib/numbl-core/jit/e2/assignKernel.d.ts +34 -0
  39. package/dist-lib/numbl-core/jit/e2/astToJitExpr.d.ts +25 -0
  40. package/dist-lib/numbl-core/jit/e2/cache.d.ts +80 -0
  41. package/dist-lib/numbl-core/jit/e2/chainKernelEmit.d.ts +55 -0
  42. package/dist-lib/numbl-core/jit/e2/classify.d.ts +119 -0
  43. package/dist-lib/numbl-core/jit/e2/compileFn.d.ts +16 -0
  44. package/dist-lib/numbl-core/jit/e2/complexChainKernelEmit.d.ts +79 -0
  45. package/dist-lib/numbl-core/jit/e2/emitShared.d.ts +71 -0
  46. package/dist-lib/numbl-core/jit/e2/install.d.ts +11 -0
  47. package/dist-lib/numbl-core/jit/e2/liveness.d.ts +29 -0
  48. package/dist-lib/numbl-core/jit/e2/loopKernel.d.ts +49 -0
  49. package/dist-lib/numbl-core/jit/e2/loopKernelEmit.d.ts +75 -0
  50. package/dist-lib/numbl-core/jit/e2/multiReductionDriver.d.ts +24 -0
  51. package/dist-lib/numbl-core/jit/e2/reductionKernelEmit.d.ts +72 -0
  52. package/dist-lib/numbl-core/jit/e2/scalarFnDriver.d.ts +29 -0
  53. package/dist-lib/numbl-core/jit/fusedChainHelpers.d.ts +65 -0
  54. package/dist-lib/numbl-core/jit/fusedScalarEmit.d.ts +69 -0
  55. package/dist-lib/numbl-core/jit/fusion.d.ts +71 -0
  56. package/dist-lib/numbl-core/jit/fusionOps.d.ts +25 -0
  57. package/dist-lib/numbl-core/jit/heavyOps.d.ts +15 -0
  58. package/dist-lib/numbl-core/{interpreter/jit → jit}/index.d.ts +2 -2
  59. package/dist-lib/numbl-core/jit/jitBailSafety.d.ts +41 -0
  60. package/dist-lib/numbl-core/{interpreter/jit → jit}/jitLoop.d.ts +2 -2
  61. package/dist-lib/numbl-core/{interpreter/jit → jit}/jitLoopAnalysis.d.ts +6 -1
  62. package/dist-lib/numbl-core/jit/jitLower.d.ts +122 -0
  63. package/dist-lib/numbl-core/jit/jitLowerExpr.d.ts +27 -0
  64. package/dist-lib/numbl-core/jit/jitLowerStmt.d.ts +9 -0
  65. package/dist-lib/numbl-core/{interpreter/jit → jit}/jitLowerTypes.d.ts +7 -3
  66. package/dist-lib/numbl-core/jit/jitTopLevel.d.ts +22 -0
  67. package/dist-lib/numbl-core/{interpreter/jit → jit}/jitTypes.d.ts +133 -1
  68. package/dist-lib/numbl-core/{interpreter/jit → jit/js}/jitCodegen.d.ts +2 -2
  69. package/dist-lib/numbl-core/{interpreter/jit → jit/js}/jitCodegenHoist.d.ts +19 -1
  70. package/dist-lib/numbl-core/{interpreter/jit → jit/js}/jitHelpers.d.ts +15 -3
  71. package/dist-lib/numbl-core/{interpreter/jit → jit/js}/jitHelpersIndex.d.ts +7 -0
  72. package/dist-lib/numbl-core/jit/js/jitHelpersTensor.d.ts +34 -0
  73. package/dist-lib/numbl-core/jit/js/jsFusedCodegen.d.ts +17 -0
  74. package/dist-lib/numbl-core/jit/js/jsMultiReduction.d.ts +70 -0
  75. package/dist-lib/numbl-core/jit/scalarEmit.d.ts +58 -0
  76. package/dist-lib/numbl-core/lexer/types.d.ts +2 -1
  77. package/dist-lib/numbl-core/native/lapack-bridge.d.ts +39 -1
  78. package/dist-lib/numbl-core/ops/bessel.d.ts +18 -0
  79. package/dist-lib/numbl-core/ops/comparison.d.ts +11 -0
  80. package/dist-lib/numbl-core/ops/complexBinaryElemwise.d.ts +10 -0
  81. package/dist-lib/numbl-core/ops/complexUnaryElemwise.d.ts +8 -0
  82. package/dist-lib/numbl-core/ops/dispatch.d.ts +26 -0
  83. package/dist-lib/numbl-core/ops/index.d.ts +8 -0
  84. package/dist-lib/numbl-core/ops/opCodes.d.ts +70 -0
  85. package/dist-lib/numbl-core/ops/realBinaryElemwise.d.ts +8 -0
  86. package/dist-lib/numbl-core/ops/realUnaryElemwise.d.ts +5 -0
  87. package/dist-lib/numbl-core/ops/reduce.d.ts +6 -0
  88. package/dist-lib/numbl-core/parser/types.d.ts +6 -0
  89. package/dist-lib/numbl-core/runtime/alloc.d.ts +23 -0
  90. package/dist-lib/numbl-core/runtime/runtime.d.ts +1 -0
  91. package/dist-lib/numbl-core/version.d.ts +1 -1
  92. package/native/jit_runtime/jit_runtime.c +261 -0
  93. package/native/jit_runtime/jit_runtime.h +204 -0
  94. package/native/numbl_addon.cpp +62 -1
  95. package/native/ops/bessel.c +572 -0
  96. package/native/ops/comparison.c +150 -0
  97. package/native/ops/complex_binary_elemwise.c +192 -0
  98. package/native/ops/complex_unary_elemwise.c +152 -0
  99. package/native/ops/numbl_ops.c +66 -0
  100. package/native/ops/numbl_ops.h +262 -0
  101. package/native/ops/real_binary_elemwise.c +85 -0
  102. package/native/ops/real_unary_elemwise.c +104 -0
  103. package/native/ops/reduce.c +162 -0
  104. package/native/ops_napi.cpp +320 -0
  105. package/package.json +8 -9
  106. package/dist-lib/numbl-core/interpreter/jit/jitHelpersTensor.d.ts +0 -28
  107. package/dist-lib/numbl-core/interpreter/jit/jitLower.d.ts +0 -23
  108. /package/dist-lib/numbl-core/{interpreter/jit → jit/js}/jitHelpersComplex.d.ts +0 -0
@@ -0,0 +1,262 @@
1
+ /**
2
+ * numbl_ops — pure C tensor-ops library.
3
+ *
4
+ * Stable C ABI for tensor operations dispatched by integer op-code.
5
+ * Caller owns input AND output memory; functions never allocate output.
6
+ *
7
+ * Mirrored 1:1 by the TS implementation in src/numbl-core/ops/.
8
+ * The N-API addon thin-wraps these entry points; the future C-JIT links
9
+ * directly against the same library.
10
+ *
11
+ * All numeric data is column-major (Fortran/MATLAB) double precision.
12
+ * Complex tensors use split storage: separate `re` and `im` Float64 buffers.
13
+ * For complex inputs, `im` may be NULL to indicate "all zero".
14
+ *
15
+ * Return codes: 0 on success, negative on error (see numbl_strerror).
16
+ */
17
+
18
+ #ifndef NUMBL_OPS_H
19
+ #define NUMBL_OPS_H
20
+
21
+ #include <stddef.h>
22
+
23
+ #ifdef __cplusplus
24
+ extern "C" {
25
+ #endif
26
+
27
+ /* ── Error codes ──────────────────────────────────────────────────────── */
28
+
29
+ #define NUMBL_OK 0
30
+ #define NUMBL_ERR_BAD_OP -1
31
+ #define NUMBL_ERR_NULL_PTR -2
32
+
33
+ /** Human-readable message for a numbl error code. */
34
+ const char* numbl_strerror(int code);
35
+
36
+ /* ── Real binary element-wise ops ─────────────────────────────────────── */
37
+
38
+ typedef enum {
39
+ NUMBL_REAL_BIN_ADD = 0,
40
+ NUMBL_REAL_BIN_SUB = 1,
41
+ NUMBL_REAL_BIN_MUL = 2,
42
+ NUMBL_REAL_BIN_DIV = 3
43
+ } numbl_real_bin_op_t;
44
+
45
+ /**
46
+ * out[i] = a[i] OP b[i] for i in [0, n).
47
+ * a, b, out must each point to at least n doubles.
48
+ */
49
+ int numbl_real_binary_elemwise(int op, size_t n,
50
+ const double* a,
51
+ const double* b,
52
+ double* out);
53
+
54
+ /**
55
+ * Scalar-tensor variant.
56
+ * If scalar_on_left: out[i] = scalar OP arr[i]
57
+ * Else: out[i] = arr[i] OP scalar
58
+ */
59
+ int numbl_real_scalar_binary_elemwise(int op, size_t n,
60
+ double scalar,
61
+ const double* arr,
62
+ int scalar_on_left,
63
+ double* out);
64
+
65
+ /* ── Complex binary element-wise ops ──────────────────────────────────── */
66
+
67
+ typedef enum {
68
+ NUMBL_COMPLEX_BIN_ADD = 0,
69
+ NUMBL_COMPLEX_BIN_SUB = 1,
70
+ NUMBL_COMPLEX_BIN_MUL = 2,
71
+ NUMBL_COMPLEX_BIN_DIV = 3
72
+ } numbl_complex_bin_op_t;
73
+
74
+ /**
75
+ * Complex element-wise binary op, split storage.
76
+ * a_im or b_im may be NULL → treat as zero (mixed real/complex).
77
+ * out_re and out_im are required (caller allocates both, even if result is real).
78
+ */
79
+ int numbl_complex_binary_elemwise(int op, size_t n,
80
+ const double* a_re, const double* a_im,
81
+ const double* b_re, const double* b_im,
82
+ double* out_re, double* out_im);
83
+
84
+ /**
85
+ * Complex-scalar / tensor variant.
86
+ * arr_im may be NULL → treat tensor as purely real.
87
+ * If scalar_on_left: out[i] = scalar OP arr[i]
88
+ * Else: out[i] = arr[i] OP scalar
89
+ */
90
+ int numbl_complex_scalar_binary_elemwise(int op, size_t n,
91
+ double s_re, double s_im,
92
+ const double* arr_re,
93
+ const double* arr_im,
94
+ int scalar_on_left,
95
+ double* out_re, double* out_im);
96
+
97
+ /* ── Unary element-wise ops (real + complex) ──────────────────────────── */
98
+
99
+ typedef enum {
100
+ NUMBL_UNARY_EXP = 0,
101
+ NUMBL_UNARY_LOG = 1,
102
+ NUMBL_UNARY_LOG2 = 2,
103
+ NUMBL_UNARY_LOG10 = 3,
104
+ NUMBL_UNARY_SQRT = 4,
105
+ NUMBL_UNARY_ABS = 5,
106
+ NUMBL_UNARY_FLOOR = 6,
107
+ NUMBL_UNARY_CEIL = 7,
108
+ NUMBL_UNARY_ROUND = 8,
109
+ NUMBL_UNARY_TRUNC = 9,
110
+ NUMBL_UNARY_SIN = 10,
111
+ NUMBL_UNARY_COS = 11,
112
+ NUMBL_UNARY_TAN = 12,
113
+ NUMBL_UNARY_ASIN = 13,
114
+ NUMBL_UNARY_ACOS = 14,
115
+ NUMBL_UNARY_ATAN = 15,
116
+ NUMBL_UNARY_SINH = 16,
117
+ NUMBL_UNARY_COSH = 17,
118
+ NUMBL_UNARY_TANH = 18,
119
+ NUMBL_UNARY_SIGN = 19
120
+ } numbl_unary_op_t;
121
+
122
+ /**
123
+ * Real unary element-wise: out[i] = OP(a[i]).
124
+ * a, out must point to at least n doubles.
125
+ */
126
+ int numbl_real_unary_elemwise(int op, size_t n,
127
+ const double* a, double* out);
128
+
129
+ /**
130
+ * Complex unary element-wise, split storage.
131
+ * a_im may be NULL → treat as zero.
132
+ * out_re and out_im are both required.
133
+ *
134
+ * ABS is unsupported here (output would be real-valued — use
135
+ * numbl_complex_abs for that). Other ops are supported.
136
+ */
137
+ int numbl_complex_unary_elemwise(int op, size_t n,
138
+ const double* a_re, const double* a_im,
139
+ double* out_re, double* out_im);
140
+
141
+ /**
142
+ * Complex absolute value: out[i] = sqrt(re[i]^2 + im[i]^2).
143
+ * Output is real-valued. a_im may be NULL.
144
+ */
145
+ int numbl_complex_abs(size_t n,
146
+ const double* a_re, const double* a_im,
147
+ double* out);
148
+
149
+ /* ── Comparisons (logical output stored as 0.0/1.0 in double buffer) ──── */
150
+
151
+ typedef enum {
152
+ NUMBL_CMP_EQ = 0, /* == */
153
+ NUMBL_CMP_NE = 1, /* != */
154
+ NUMBL_CMP_LT = 2, /* < */
155
+ NUMBL_CMP_LE = 3, /* <= */
156
+ NUMBL_CMP_GT = 4, /* > */
157
+ NUMBL_CMP_GE = 5 /* >= */
158
+ } numbl_cmp_op_t;
159
+
160
+ /**
161
+ * Real comparison: out[i] = (a[i] OP b[i]) ? 1.0 : 0.0.
162
+ * NaN in either operand yields 0 (except for !=, which yields 1).
163
+ */
164
+ int numbl_real_comparison(int op, size_t n,
165
+ const double* a, const double* b,
166
+ double* out);
167
+
168
+ /** Scalar-tensor variant. */
169
+ int numbl_real_scalar_comparison(int op, size_t n,
170
+ double scalar, const double* arr,
171
+ int scalar_on_left, double* out);
172
+
173
+ /**
174
+ * Complex comparison. EQ / NE compare both parts; LT/LE/GT/GE compare
175
+ * only the real parts (MATLAB semantics). a_im, b_im may be NULL.
176
+ */
177
+ int numbl_complex_comparison(int op, size_t n,
178
+ const double* a_re, const double* a_im,
179
+ const double* b_re, const double* b_im,
180
+ double* out);
181
+
182
+ /** Complex-scalar / tensor comparison. */
183
+ int numbl_complex_scalar_comparison(int op, size_t n,
184
+ double s_re, double s_im,
185
+ const double* arr_re,
186
+ const double* arr_im,
187
+ int scalar_on_left, double* out);
188
+
189
+ /* ── Flat reductions (reduce entire buffer to a single value) ─────────── */
190
+
191
+ typedef enum {
192
+ NUMBL_REDUCE_SUM = 0,
193
+ NUMBL_REDUCE_PROD = 1,
194
+ NUMBL_REDUCE_MAX = 2,
195
+ NUMBL_REDUCE_MIN = 3,
196
+ NUMBL_REDUCE_ANY = 4, /* result: 1 if any nonzero, else 0 */
197
+ NUMBL_REDUCE_ALL = 5, /* result: 1 if all nonzero, else 0 */
198
+ NUMBL_REDUCE_MEAN = 6
199
+ } numbl_reduce_op_t;
200
+
201
+ /**
202
+ * Flat real reduction: reduce a contiguous real buffer to a single value.
203
+ * For MAX/MIN on an empty buffer returns +Inf/-Inf. MEAN on empty → NaN.
204
+ * NaN handling: NaN propagates for SUM/PROD/MAX/MIN/MEAN (omit-NaN variants
205
+ * could be added later as separate op codes). ANY/ALL treat NaN as nonzero.
206
+ */
207
+ int numbl_real_flat_reduce(int op, size_t n, const double* a, double* out);
208
+
209
+ /**
210
+ * Flat complex reduction: SUM, PROD, ANY, ALL supported. MAX/MIN/MEAN
211
+ * return NUMBL_ERR_BAD_OP (use real-only for those). a_im may be NULL.
212
+ * For SUM/PROD, both out_re and out_im must be non-NULL.
213
+ * For ANY/ALL, out_re receives 0/1 and out_im is untouched (may be NULL).
214
+ */
215
+ int numbl_complex_flat_reduce(int op, size_t n,
216
+ const double* a_re, const double* a_im,
217
+ double* out_re, double* out_im);
218
+
219
+ /* ── Bessel functions (real input) ────────────────────────────────────── */
220
+
221
+ typedef enum {
222
+ NUMBL_BESSEL_J = 0, /* first kind, real output */
223
+ NUMBL_BESSEL_Y = 1, /* second kind, real output */
224
+ NUMBL_BESSEL_I = 2, /* modified first kind, real output */
225
+ NUMBL_BESSEL_K = 3 /* modified second kind, real output */
226
+ } numbl_bessel_real_op_t;
227
+
228
+ /**
229
+ * Real-valued Bessel: out[i] = bessel<OP>(nu, z[i]).
230
+ * nu is a fixed scalar order. z, out must each point to at least n doubles.
231
+ * scale: 0 unscaled; 1 applies the scaled variant (J,Y,I: exp(-|z|); K: exp(z)).
232
+ */
233
+ int numbl_bessel_real(int op, double nu, size_t n,
234
+ const double* z, int scale, double* out);
235
+
236
+ /**
237
+ * Hankel function H_nu^{(k)}(z) with real z.
238
+ * k_kind = 1 → out[i] = J_nu(z[i]) + i * Y_nu(z[i])
239
+ * k_kind = 2 → out[i] = J_nu(z[i]) - i * Y_nu(z[i])
240
+ * Split complex output: out_re and out_im must each point to at least n doubles.
241
+ * scale: 0 unscaled; 1 multiplies output by exp(-i*z) (k=1) or exp(+i*z) (k=2).
242
+ */
243
+ int numbl_bessel_h(int k_kind, double nu, size_t n,
244
+ const double* z, int scale,
245
+ double* out_re, double* out_im);
246
+
247
+ /* ── Op-code dump (for drift detection) ───────────────────────────────── */
248
+
249
+ /**
250
+ * Writes a small C string describing the op-code enum values into buf.
251
+ * Format is a JSON-like string of category=op_name=value triples.
252
+ * Returns the number of bytes that would be written (excluding terminator).
253
+ * If buf is NULL or buf_size is too small, nothing is written but the
254
+ * required size is still returned.
255
+ */
256
+ size_t numbl_dump_op_codes(char* buf, size_t buf_size);
257
+
258
+ #ifdef __cplusplus
259
+ }
260
+ #endif
261
+
262
+ #endif /* NUMBL_OPS_H */
@@ -0,0 +1,85 @@
1
+ /**
2
+ * Real element-wise binary ops (op-code dispatch).
3
+ *
4
+ * Caller-allocated input/output buffers; never copies.
5
+ */
6
+
7
+ #include "numbl_ops.h"
8
+
9
+ int numbl_real_binary_elemwise(int op, size_t n,
10
+ const double* a,
11
+ const double* b,
12
+ double* out) {
13
+ if (!a || !b || !out) return NUMBL_ERR_NULL_PTR;
14
+ switch (op) {
15
+ case NUMBL_REAL_BIN_ADD:
16
+ #pragma omp simd
17
+ for (size_t i = 0; i < n; i++) out[i] = a[i] + b[i];
18
+ return NUMBL_OK;
19
+ case NUMBL_REAL_BIN_SUB:
20
+ #pragma omp simd
21
+ for (size_t i = 0; i < n; i++) out[i] = a[i] - b[i];
22
+ return NUMBL_OK;
23
+ case NUMBL_REAL_BIN_MUL:
24
+ #pragma omp simd
25
+ for (size_t i = 0; i < n; i++) out[i] = a[i] * b[i];
26
+ return NUMBL_OK;
27
+ case NUMBL_REAL_BIN_DIV:
28
+ #pragma omp simd
29
+ for (size_t i = 0; i < n; i++) out[i] = a[i] / b[i];
30
+ return NUMBL_OK;
31
+ default:
32
+ return NUMBL_ERR_BAD_OP;
33
+ }
34
+ }
35
+
36
+ int numbl_real_scalar_binary_elemwise(int op, size_t n,
37
+ double scalar,
38
+ const double* arr,
39
+ int scalar_on_left,
40
+ double* out) {
41
+ if (!arr || !out) return NUMBL_ERR_NULL_PTR;
42
+ if (scalar_on_left) {
43
+ switch (op) {
44
+ case NUMBL_REAL_BIN_ADD:
45
+ #pragma omp simd
46
+ for (size_t i = 0; i < n; i++) out[i] = scalar + arr[i];
47
+ return NUMBL_OK;
48
+ case NUMBL_REAL_BIN_SUB:
49
+ #pragma omp simd
50
+ for (size_t i = 0; i < n; i++) out[i] = scalar - arr[i];
51
+ return NUMBL_OK;
52
+ case NUMBL_REAL_BIN_MUL:
53
+ #pragma omp simd
54
+ for (size_t i = 0; i < n; i++) out[i] = scalar * arr[i];
55
+ return NUMBL_OK;
56
+ case NUMBL_REAL_BIN_DIV:
57
+ #pragma omp simd
58
+ for (size_t i = 0; i < n; i++) out[i] = scalar / arr[i];
59
+ return NUMBL_OK;
60
+ default:
61
+ return NUMBL_ERR_BAD_OP;
62
+ }
63
+ } else {
64
+ switch (op) {
65
+ case NUMBL_REAL_BIN_ADD:
66
+ #pragma omp simd
67
+ for (size_t i = 0; i < n; i++) out[i] = arr[i] + scalar;
68
+ return NUMBL_OK;
69
+ case NUMBL_REAL_BIN_SUB:
70
+ #pragma omp simd
71
+ for (size_t i = 0; i < n; i++) out[i] = arr[i] - scalar;
72
+ return NUMBL_OK;
73
+ case NUMBL_REAL_BIN_MUL:
74
+ #pragma omp simd
75
+ for (size_t i = 0; i < n; i++) out[i] = arr[i] * scalar;
76
+ return NUMBL_OK;
77
+ case NUMBL_REAL_BIN_DIV:
78
+ #pragma omp simd
79
+ for (size_t i = 0; i < n; i++) out[i] = arr[i] / scalar;
80
+ return NUMBL_OK;
81
+ default:
82
+ return NUMBL_ERR_BAD_OP;
83
+ }
84
+ }
85
+ }
@@ -0,0 +1,104 @@
1
+ /**
2
+ * Real unary element-wise ops (op-code dispatch).
3
+ * Caller-allocated input/output buffers; never copies.
4
+ */
5
+
6
+ #include "numbl_ops.h"
7
+
8
+ #include <math.h>
9
+
10
+ static inline double rsign(double x) {
11
+ return x > 0.0 ? 1.0 : x < 0.0 ? -1.0 : 0.0;
12
+ }
13
+
14
+ int numbl_real_unary_elemwise(int op, size_t n,
15
+ const double* a, double* out) {
16
+ if (!a || !out) return NUMBL_ERR_NULL_PTR;
17
+ // `#pragma omp simd` + `-fopenmp-simd -fno-math-errno -ffast-math`
18
+ // lets gcc emit calls to libmvec's vector math (`_ZGVdN4v_exp`, etc.)
19
+ // instead of scalar libm, giving 2-4x on AVX2 hardware.
20
+ switch (op) {
21
+ case NUMBL_UNARY_EXP:
22
+ #pragma omp simd
23
+ for (size_t i = 0; i < n; i++) out[i] = exp(a[i]);
24
+ return NUMBL_OK;
25
+ case NUMBL_UNARY_LOG:
26
+ #pragma omp simd
27
+ for (size_t i = 0; i < n; i++) out[i] = log(a[i]);
28
+ return NUMBL_OK;
29
+ case NUMBL_UNARY_LOG2:
30
+ #pragma omp simd
31
+ for (size_t i = 0; i < n; i++) out[i] = log2(a[i]);
32
+ return NUMBL_OK;
33
+ case NUMBL_UNARY_LOG10:
34
+ #pragma omp simd
35
+ for (size_t i = 0; i < n; i++) out[i] = log10(a[i]);
36
+ return NUMBL_OK;
37
+ case NUMBL_UNARY_SQRT:
38
+ #pragma omp simd
39
+ for (size_t i = 0; i < n; i++) out[i] = sqrt(a[i]);
40
+ return NUMBL_OK;
41
+ case NUMBL_UNARY_ABS:
42
+ #pragma omp simd
43
+ for (size_t i = 0; i < n; i++) out[i] = fabs(a[i]);
44
+ return NUMBL_OK;
45
+ case NUMBL_UNARY_FLOOR:
46
+ #pragma omp simd
47
+ for (size_t i = 0; i < n; i++) out[i] = floor(a[i]);
48
+ return NUMBL_OK;
49
+ case NUMBL_UNARY_CEIL:
50
+ #pragma omp simd
51
+ for (size_t i = 0; i < n; i++) out[i] = ceil(a[i]);
52
+ return NUMBL_OK;
53
+ case NUMBL_UNARY_ROUND:
54
+ #pragma omp simd
55
+ for (size_t i = 0; i < n; i++) out[i] = round(a[i]);
56
+ return NUMBL_OK;
57
+ case NUMBL_UNARY_TRUNC:
58
+ #pragma omp simd
59
+ for (size_t i = 0; i < n; i++) out[i] = trunc(a[i]);
60
+ return NUMBL_OK;
61
+ case NUMBL_UNARY_SIN:
62
+ #pragma omp simd
63
+ for (size_t i = 0; i < n; i++) out[i] = sin(a[i]);
64
+ return NUMBL_OK;
65
+ case NUMBL_UNARY_COS:
66
+ #pragma omp simd
67
+ for (size_t i = 0; i < n; i++) out[i] = cos(a[i]);
68
+ return NUMBL_OK;
69
+ case NUMBL_UNARY_TAN:
70
+ #pragma omp simd
71
+ for (size_t i = 0; i < n; i++) out[i] = tan(a[i]);
72
+ return NUMBL_OK;
73
+ case NUMBL_UNARY_ASIN:
74
+ #pragma omp simd
75
+ for (size_t i = 0; i < n; i++) out[i] = asin(a[i]);
76
+ return NUMBL_OK;
77
+ case NUMBL_UNARY_ACOS:
78
+ #pragma omp simd
79
+ for (size_t i = 0; i < n; i++) out[i] = acos(a[i]);
80
+ return NUMBL_OK;
81
+ case NUMBL_UNARY_ATAN:
82
+ #pragma omp simd
83
+ for (size_t i = 0; i < n; i++) out[i] = atan(a[i]);
84
+ return NUMBL_OK;
85
+ case NUMBL_UNARY_SINH:
86
+ #pragma omp simd
87
+ for (size_t i = 0; i < n; i++) out[i] = sinh(a[i]);
88
+ return NUMBL_OK;
89
+ case NUMBL_UNARY_COSH:
90
+ #pragma omp simd
91
+ for (size_t i = 0; i < n; i++) out[i] = cosh(a[i]);
92
+ return NUMBL_OK;
93
+ case NUMBL_UNARY_TANH:
94
+ #pragma omp simd
95
+ for (size_t i = 0; i < n; i++) out[i] = tanh(a[i]);
96
+ return NUMBL_OK;
97
+ case NUMBL_UNARY_SIGN:
98
+ #pragma omp simd
99
+ for (size_t i = 0; i < n; i++) out[i] = rsign(a[i]);
100
+ return NUMBL_OK;
101
+ default:
102
+ return NUMBL_ERR_BAD_OP;
103
+ }
104
+ }
@@ -0,0 +1,162 @@
1
+ /**
2
+ * Flat reductions: reduce an entire buffer to a single value.
3
+ * Caller-allocated input; 1-double output.
4
+ */
5
+
6
+ #include "numbl_ops.h"
7
+
8
+ #include <math.h>
9
+ #include <stdint.h>
10
+ #include <string.h>
11
+
12
+ /* Bit-level NaN detection + constant. `-ffast-math` implies
13
+ * `-ffinite-math-only`, which makes gcc fold `isnan(x)` / `x != x` to
14
+ * `false` and collapse `0.0 / 0.0` to 0. Max/min need MATLAB-compatible
15
+ * NaN-omit semantics, so we inspect the IEEE 754 bit pattern directly
16
+ * and fabricate NaN by bit pattern too. */
17
+ static inline int nb_isnan(double x) {
18
+ uint64_t bits;
19
+ memcpy(&bits, &x, sizeof(bits));
20
+ return (bits & 0x7FFFFFFFFFFFFFFFULL) > 0x7FF0000000000000ULL;
21
+ }
22
+
23
+ static inline double nb_nan(void) {
24
+ uint64_t bits = 0x7FF8000000000000ULL; /* quiet NaN */
25
+ double x;
26
+ memcpy(&x, &bits, sizeof(x));
27
+ return x;
28
+ }
29
+
30
+ int numbl_real_flat_reduce(int op, size_t n, const double* a, double* out) {
31
+ if ((!a && n > 0) || !out) return NUMBL_ERR_NULL_PTR;
32
+
33
+ switch (op) {
34
+ case NUMBL_REDUCE_SUM: {
35
+ double s = 0.0;
36
+ for (size_t i = 0; i < n; i++) s += a[i];
37
+ *out = s;
38
+ return NUMBL_OK;
39
+ }
40
+ case NUMBL_REDUCE_PROD: {
41
+ double p = 1.0;
42
+ for (size_t i = 0; i < n; i++) p *= a[i];
43
+ *out = p;
44
+ return NUMBL_OK;
45
+ }
46
+ case NUMBL_REDUCE_MAX: {
47
+ /* MATLAB default: omitnan. If ALL values are NaN, result is NaN. */
48
+ double m = -INFINITY;
49
+ int any = 0;
50
+ for (size_t i = 0; i < n; i++) {
51
+ double v = a[i];
52
+ if (nb_isnan(v)) continue;
53
+ if (v > m) m = v;
54
+ any = 1;
55
+ }
56
+ *out = any ? m : nb_nan();
57
+ return NUMBL_OK;
58
+ }
59
+ case NUMBL_REDUCE_MIN: {
60
+ double m = INFINITY;
61
+ int any = 0;
62
+ for (size_t i = 0; i < n; i++) {
63
+ double v = a[i];
64
+ if (nb_isnan(v)) continue;
65
+ if (v < m) m = v;
66
+ any = 1;
67
+ }
68
+ *out = any ? m : nb_nan();
69
+ return NUMBL_OK;
70
+ }
71
+ case NUMBL_REDUCE_ANY: {
72
+ double r = 0.0;
73
+ for (size_t i = 0; i < n; i++) {
74
+ if (a[i] != 0.0 || nb_isnan(a[i])) { r = 1.0; break; }
75
+ }
76
+ *out = r;
77
+ return NUMBL_OK;
78
+ }
79
+ case NUMBL_REDUCE_ALL: {
80
+ double r = 1.0;
81
+ for (size_t i = 0; i < n; i++) {
82
+ if (a[i] == 0.0) { r = 0.0; break; }
83
+ }
84
+ *out = r;
85
+ return NUMBL_OK;
86
+ }
87
+ case NUMBL_REDUCE_MEAN: {
88
+ if (n == 0) { *out = nb_nan(); return NUMBL_OK; }
89
+ double s = 0.0;
90
+ for (size_t i = 0; i < n; i++) s += a[i];
91
+ *out = s / (double)n;
92
+ return NUMBL_OK;
93
+ }
94
+ default:
95
+ return NUMBL_ERR_BAD_OP;
96
+ }
97
+ }
98
+
99
+ int numbl_complex_flat_reduce(int op, size_t n,
100
+ const double* a_re, const double* a_im,
101
+ double* out_re, double* out_im) {
102
+ if ((!a_re && n > 0) || !out_re) return NUMBL_ERR_NULL_PTR;
103
+
104
+ switch (op) {
105
+ case NUMBL_REDUCE_SUM: {
106
+ if (!out_im) return NUMBL_ERR_NULL_PTR;
107
+ double sr = 0.0, si = 0.0;
108
+ for (size_t i = 0; i < n; i++) {
109
+ sr += a_re[i];
110
+ if (a_im) si += a_im[i];
111
+ }
112
+ *out_re = sr;
113
+ *out_im = si;
114
+ return NUMBL_OK;
115
+ }
116
+ case NUMBL_REDUCE_PROD: {
117
+ if (!out_im) return NUMBL_ERR_NULL_PTR;
118
+ /* Accumulate Gauss multiplication: (accRe + accIm i)(ar + ai i). */
119
+ double ar_acc = 1.0, ai_acc = 0.0;
120
+ for (size_t i = 0; i < n; i++) {
121
+ double ar = a_re[i];
122
+ double ai = a_im ? a_im[i] : 0.0;
123
+ double nr = ar_acc * ar - ai_acc * ai;
124
+ double ni = ar_acc * ai + ai_acc * ar;
125
+ ar_acc = nr;
126
+ ai_acc = ni;
127
+ }
128
+ *out_re = ar_acc;
129
+ *out_im = ai_acc;
130
+ return NUMBL_OK;
131
+ }
132
+ case NUMBL_REDUCE_ANY: {
133
+ double r = 0.0;
134
+ for (size_t i = 0; i < n; i++) {
135
+ double ar = a_re[i];
136
+ double ai = a_im ? a_im[i] : 0.0;
137
+ if (ar != 0.0 || ai != 0.0 || nb_isnan(ar) || nb_isnan(ai)) {
138
+ r = 1.0;
139
+ break;
140
+ }
141
+ }
142
+ *out_re = r;
143
+ return NUMBL_OK;
144
+ }
145
+ case NUMBL_REDUCE_ALL: {
146
+ double r = 1.0;
147
+ for (size_t i = 0; i < n; i++) {
148
+ double ar = a_re[i];
149
+ double ai = a_im ? a_im[i] : 0.0;
150
+ if (ar == 0.0 && ai == 0.0) {
151
+ r = 0.0;
152
+ break;
153
+ }
154
+ }
155
+ *out_re = r;
156
+ return NUMBL_OK;
157
+ }
158
+ default:
159
+ /* MAX/MIN/MEAN on complex: ambiguous; caller handles. */
160
+ return NUMBL_ERR_BAD_OP;
161
+ }
162
+ }