numkong 7.0.0 → 7.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (315) hide show
  1. package/README.md +197 -124
  2. package/binding.gyp +34 -484
  3. package/c/dispatch_bf16.c +59 -1
  4. package/c/dispatch_e2m3.c +41 -8
  5. package/c/dispatch_e3m2.c +49 -8
  6. package/c/dispatch_e4m3.c +51 -9
  7. package/c/dispatch_e5m2.c +45 -1
  8. package/c/dispatch_f16.c +79 -26
  9. package/c/dispatch_f16c.c +5 -5
  10. package/c/dispatch_f32.c +56 -0
  11. package/c/dispatch_f64.c +52 -0
  12. package/c/dispatch_i4.c +3 -0
  13. package/c/dispatch_i8.c +62 -3
  14. package/c/dispatch_other.c +18 -0
  15. package/c/dispatch_u1.c +54 -9
  16. package/c/dispatch_u4.c +3 -0
  17. package/c/dispatch_u8.c +64 -3
  18. package/c/numkong.c +3 -0
  19. package/include/README.md +79 -9
  20. package/include/numkong/attention/sapphireamx.h +278 -276
  21. package/include/numkong/attention/sme.h +983 -977
  22. package/include/numkong/attention.h +1 -1
  23. package/include/numkong/capabilities.h +289 -94
  24. package/include/numkong/cast/README.md +40 -40
  25. package/include/numkong/cast/diamond.h +64 -0
  26. package/include/numkong/cast/haswell.h +42 -194
  27. package/include/numkong/cast/icelake.h +42 -37
  28. package/include/numkong/cast/loongsonasx.h +252 -0
  29. package/include/numkong/cast/neon.h +216 -249
  30. package/include/numkong/cast/powervsx.h +449 -0
  31. package/include/numkong/cast/rvv.h +223 -274
  32. package/include/numkong/cast/sapphire.h +18 -18
  33. package/include/numkong/cast/serial.h +1018 -944
  34. package/include/numkong/cast/skylake.h +82 -23
  35. package/include/numkong/cast/v128relaxed.h +462 -105
  36. package/include/numkong/cast.h +24 -0
  37. package/include/numkong/cast.hpp +44 -0
  38. package/include/numkong/curved/README.md +17 -17
  39. package/include/numkong/curved/neon.h +131 -7
  40. package/include/numkong/curved/neonbfdot.h +6 -7
  41. package/include/numkong/curved/rvv.h +26 -26
  42. package/include/numkong/curved/smef64.h +186 -182
  43. package/include/numkong/curved.h +14 -18
  44. package/include/numkong/dot/README.md +154 -137
  45. package/include/numkong/dot/alder.h +43 -43
  46. package/include/numkong/dot/diamond.h +158 -0
  47. package/include/numkong/dot/genoa.h +4 -30
  48. package/include/numkong/dot/haswell.h +215 -180
  49. package/include/numkong/dot/icelake.h +190 -76
  50. package/include/numkong/dot/loongsonasx.h +671 -0
  51. package/include/numkong/dot/neon.h +124 -73
  52. package/include/numkong/dot/neonbfdot.h +11 -12
  53. package/include/numkong/dot/neonfhm.h +44 -46
  54. package/include/numkong/dot/neonfp8.h +323 -0
  55. package/include/numkong/dot/neonsdot.h +190 -76
  56. package/include/numkong/dot/powervsx.h +752 -0
  57. package/include/numkong/dot/rvv.h +92 -84
  58. package/include/numkong/dot/rvvbf16.h +12 -12
  59. package/include/numkong/dot/rvvhalf.h +12 -12
  60. package/include/numkong/dot/sapphire.h +4 -4
  61. package/include/numkong/dot/serial.h +66 -30
  62. package/include/numkong/dot/sierra.h +31 -31
  63. package/include/numkong/dot/skylake.h +142 -110
  64. package/include/numkong/dot/sve.h +217 -177
  65. package/include/numkong/dot/svebfdot.h +10 -10
  66. package/include/numkong/dot/svehalf.h +85 -41
  67. package/include/numkong/dot/svesdot.h +89 -0
  68. package/include/numkong/dot/v128relaxed.h +124 -89
  69. package/include/numkong/dot.h +114 -48
  70. package/include/numkong/dots/README.md +203 -203
  71. package/include/numkong/dots/alder.h +12 -9
  72. package/include/numkong/dots/diamond.h +86 -0
  73. package/include/numkong/dots/genoa.h +10 -4
  74. package/include/numkong/dots/haswell.h +63 -48
  75. package/include/numkong/dots/icelake.h +27 -18
  76. package/include/numkong/dots/loongsonasx.h +176 -0
  77. package/include/numkong/dots/neon.h +14 -11
  78. package/include/numkong/dots/neonbfdot.h +4 -3
  79. package/include/numkong/dots/neonfhm.h +11 -9
  80. package/include/numkong/dots/neonfp8.h +99 -0
  81. package/include/numkong/dots/neonsdot.h +48 -12
  82. package/include/numkong/dots/powervsx.h +194 -0
  83. package/include/numkong/dots/rvv.h +451 -344
  84. package/include/numkong/dots/sapphireamx.h +1028 -984
  85. package/include/numkong/dots/serial.h +213 -197
  86. package/include/numkong/dots/sierra.h +10 -7
  87. package/include/numkong/dots/skylake.h +47 -36
  88. package/include/numkong/dots/sme.h +2001 -2364
  89. package/include/numkong/dots/smebi32.h +175 -162
  90. package/include/numkong/dots/smef64.h +328 -323
  91. package/include/numkong/dots/v128relaxed.h +64 -41
  92. package/include/numkong/dots.h +573 -293
  93. package/include/numkong/dots.hpp +45 -43
  94. package/include/numkong/each/README.md +133 -137
  95. package/include/numkong/each/haswell.h +6 -6
  96. package/include/numkong/each/icelake.h +7 -7
  97. package/include/numkong/each/neon.h +76 -42
  98. package/include/numkong/each/neonbfdot.h +11 -12
  99. package/include/numkong/each/neonhalf.h +24 -116
  100. package/include/numkong/each/rvv.h +28 -28
  101. package/include/numkong/each/sapphire.h +27 -161
  102. package/include/numkong/each/serial.h +6 -6
  103. package/include/numkong/each/skylake.h +7 -7
  104. package/include/numkong/each/v128relaxed.h +562 -0
  105. package/include/numkong/each.h +148 -62
  106. package/include/numkong/each.hpp +2 -2
  107. package/include/numkong/geospatial/README.md +18 -18
  108. package/include/numkong/geospatial/haswell.h +365 -325
  109. package/include/numkong/geospatial/neon.h +350 -306
  110. package/include/numkong/geospatial/rvv.h +4 -4
  111. package/include/numkong/geospatial/skylake.h +376 -340
  112. package/include/numkong/geospatial/v128relaxed.h +366 -327
  113. package/include/numkong/geospatial.h +17 -17
  114. package/include/numkong/matrix.hpp +4 -4
  115. package/include/numkong/maxsim/README.md +14 -14
  116. package/include/numkong/maxsim/alder.h +6 -6
  117. package/include/numkong/maxsim/genoa.h +4 -4
  118. package/include/numkong/maxsim/haswell.h +6 -6
  119. package/include/numkong/maxsim/icelake.h +18 -18
  120. package/include/numkong/maxsim/neonsdot.h +21 -21
  121. package/include/numkong/maxsim/sapphireamx.h +14 -14
  122. package/include/numkong/maxsim/serial.h +6 -6
  123. package/include/numkong/maxsim/sme.h +221 -196
  124. package/include/numkong/maxsim/v128relaxed.h +6 -6
  125. package/include/numkong/mesh/README.md +62 -56
  126. package/include/numkong/mesh/haswell.h +339 -464
  127. package/include/numkong/mesh/neon.h +1100 -519
  128. package/include/numkong/mesh/neonbfdot.h +36 -68
  129. package/include/numkong/mesh/rvv.h +530 -435
  130. package/include/numkong/mesh/serial.h +75 -91
  131. package/include/numkong/mesh/skylake.h +1627 -302
  132. package/include/numkong/mesh/v128relaxed.h +443 -330
  133. package/include/numkong/mesh.h +63 -49
  134. package/include/numkong/mesh.hpp +4 -4
  135. package/include/numkong/numkong.h +3 -3
  136. package/include/numkong/numkong.hpp +1 -0
  137. package/include/numkong/probability/README.md +23 -19
  138. package/include/numkong/probability/neon.h +82 -52
  139. package/include/numkong/probability/rvv.h +28 -23
  140. package/include/numkong/probability/serial.h +51 -39
  141. package/include/numkong/probability.h +20 -23
  142. package/include/numkong/random.h +1 -1
  143. package/include/numkong/reduce/README.md +143 -138
  144. package/include/numkong/reduce/alder.h +81 -77
  145. package/include/numkong/reduce/haswell.h +222 -220
  146. package/include/numkong/reduce/neon.h +629 -519
  147. package/include/numkong/reduce/neonbfdot.h +7 -218
  148. package/include/numkong/reduce/neonfhm.h +9 -381
  149. package/include/numkong/reduce/neonsdot.h +9 -9
  150. package/include/numkong/reduce/rvv.h +928 -802
  151. package/include/numkong/reduce/serial.h +23 -27
  152. package/include/numkong/reduce/sierra.h +20 -20
  153. package/include/numkong/reduce/skylake.h +326 -324
  154. package/include/numkong/reduce/v128relaxed.h +52 -52
  155. package/include/numkong/reduce.h +4 -23
  156. package/include/numkong/reduce.hpp +156 -11
  157. package/include/numkong/scalar/README.md +6 -6
  158. package/include/numkong/scalar/haswell.h +26 -17
  159. package/include/numkong/scalar/loongsonasx.h +74 -0
  160. package/include/numkong/scalar/neon.h +9 -9
  161. package/include/numkong/scalar/powervsx.h +96 -0
  162. package/include/numkong/scalar/rvv.h +2 -2
  163. package/include/numkong/scalar/sapphire.h +21 -10
  164. package/include/numkong/scalar/serial.h +21 -21
  165. package/include/numkong/scalar.h +13 -0
  166. package/include/numkong/set/README.md +28 -28
  167. package/include/numkong/set/haswell.h +12 -12
  168. package/include/numkong/set/icelake.h +14 -14
  169. package/include/numkong/set/loongsonasx.h +181 -0
  170. package/include/numkong/set/neon.h +17 -18
  171. package/include/numkong/set/powervsx.h +326 -0
  172. package/include/numkong/set/rvv.h +4 -4
  173. package/include/numkong/set/serial.h +6 -6
  174. package/include/numkong/set/sve.h +60 -59
  175. package/include/numkong/set/v128relaxed.h +6 -6
  176. package/include/numkong/set.h +21 -7
  177. package/include/numkong/sets/README.md +26 -26
  178. package/include/numkong/sets/loongsonasx.h +52 -0
  179. package/include/numkong/sets/powervsx.h +65 -0
  180. package/include/numkong/sets/smebi32.h +395 -364
  181. package/include/numkong/sets.h +83 -40
  182. package/include/numkong/sparse/README.md +4 -4
  183. package/include/numkong/sparse/icelake.h +101 -101
  184. package/include/numkong/sparse/serial.h +1 -1
  185. package/include/numkong/sparse/sve2.h +137 -141
  186. package/include/numkong/sparse/turin.h +12 -12
  187. package/include/numkong/sparse.h +10 -10
  188. package/include/numkong/spatial/README.md +230 -226
  189. package/include/numkong/spatial/alder.h +113 -116
  190. package/include/numkong/spatial/diamond.h +240 -0
  191. package/include/numkong/spatial/genoa.h +0 -68
  192. package/include/numkong/spatial/haswell.h +74 -55
  193. package/include/numkong/spatial/icelake.h +539 -58
  194. package/include/numkong/spatial/loongsonasx.h +483 -0
  195. package/include/numkong/spatial/neon.h +125 -52
  196. package/include/numkong/spatial/neonbfdot.h +8 -9
  197. package/include/numkong/spatial/neonfp8.h +258 -0
  198. package/include/numkong/spatial/neonsdot.h +180 -12
  199. package/include/numkong/spatial/powervsx.h +738 -0
  200. package/include/numkong/spatial/rvv.h +146 -139
  201. package/include/numkong/spatial/rvvbf16.h +17 -12
  202. package/include/numkong/spatial/rvvhalf.h +13 -10
  203. package/include/numkong/spatial/serial.h +13 -12
  204. package/include/numkong/spatial/sierra.h +232 -39
  205. package/include/numkong/spatial/skylake.h +73 -74
  206. package/include/numkong/spatial/sve.h +93 -72
  207. package/include/numkong/spatial/svebfdot.h +29 -29
  208. package/include/numkong/spatial/svehalf.h +52 -26
  209. package/include/numkong/spatial/svesdot.h +142 -0
  210. package/include/numkong/spatial/v128relaxed.h +293 -41
  211. package/include/numkong/spatial.h +338 -82
  212. package/include/numkong/spatials/README.md +194 -194
  213. package/include/numkong/spatials/diamond.h +82 -0
  214. package/include/numkong/spatials/haswell.h +2 -2
  215. package/include/numkong/spatials/loongsonasx.h +153 -0
  216. package/include/numkong/spatials/neonfp8.h +111 -0
  217. package/include/numkong/spatials/neonsdot.h +34 -0
  218. package/include/numkong/spatials/powervsx.h +153 -0
  219. package/include/numkong/spatials/rvv.h +259 -243
  220. package/include/numkong/spatials/sapphireamx.h +173 -173
  221. package/include/numkong/spatials/serial.h +2 -2
  222. package/include/numkong/spatials/skylake.h +2 -2
  223. package/include/numkong/spatials/sme.h +590 -605
  224. package/include/numkong/spatials/smef64.h +139 -130
  225. package/include/numkong/spatials/v128relaxed.h +2 -2
  226. package/include/numkong/spatials.h +820 -500
  227. package/include/numkong/spatials.hpp +49 -48
  228. package/include/numkong/tensor.hpp +406 -17
  229. package/include/numkong/trigonometry/README.md +19 -19
  230. package/include/numkong/trigonometry/haswell.h +402 -401
  231. package/include/numkong/trigonometry/neon.h +386 -387
  232. package/include/numkong/trigonometry/rvv.h +52 -51
  233. package/include/numkong/trigonometry/serial.h +13 -13
  234. package/include/numkong/trigonometry/skylake.h +373 -369
  235. package/include/numkong/trigonometry/v128relaxed.h +375 -374
  236. package/include/numkong/trigonometry.h +13 -13
  237. package/include/numkong/trigonometry.hpp +2 -2
  238. package/include/numkong/types.h +287 -49
  239. package/include/numkong/types.hpp +436 -12
  240. package/include/numkong/vector.hpp +82 -14
  241. package/javascript/dist/cjs/numkong-wasm.js +6 -12
  242. package/javascript/dist/cjs/numkong.d.ts +7 -1
  243. package/javascript/dist/cjs/numkong.js +37 -11
  244. package/javascript/dist/cjs/types.d.ts +9 -0
  245. package/javascript/dist/cjs/types.js +96 -0
  246. package/javascript/dist/esm/numkong-browser.d.ts +14 -0
  247. package/javascript/dist/esm/numkong-browser.js +23 -0
  248. package/javascript/dist/esm/numkong-wasm.js +6 -12
  249. package/javascript/dist/esm/numkong.d.ts +7 -1
  250. package/javascript/dist/esm/numkong.js +37 -11
  251. package/javascript/dist/esm/types.d.ts +9 -0
  252. package/javascript/dist/esm/types.js +96 -0
  253. package/javascript/node-gyp-build.d.ts +4 -1
  254. package/javascript/numkong-browser.ts +40 -0
  255. package/javascript/numkong-wasm.ts +7 -13
  256. package/javascript/numkong.c +5 -26
  257. package/javascript/numkong.ts +36 -11
  258. package/javascript/tsconfig-base.json +1 -0
  259. package/javascript/tsconfig-cjs.json +6 -1
  260. package/javascript/types.ts +110 -0
  261. package/numkong.gypi +101 -0
  262. package/package.json +34 -13
  263. package/probes/arm_neon.c +8 -0
  264. package/probes/arm_neon_bfdot.c +9 -0
  265. package/probes/arm_neon_fhm.c +9 -0
  266. package/probes/arm_neon_half.c +8 -0
  267. package/probes/arm_neon_sdot.c +9 -0
  268. package/probes/arm_neonfp8.c +9 -0
  269. package/probes/arm_sme.c +16 -0
  270. package/probes/arm_sme2.c +16 -0
  271. package/probes/arm_sme2p1.c +16 -0
  272. package/probes/arm_sme_bf16.c +16 -0
  273. package/probes/arm_sme_bi32.c +16 -0
  274. package/probes/arm_sme_f64.c +16 -0
  275. package/probes/arm_sme_fa64.c +14 -0
  276. package/probes/arm_sme_half.c +16 -0
  277. package/probes/arm_sme_lut2.c +15 -0
  278. package/probes/arm_sve.c +18 -0
  279. package/probes/arm_sve2.c +20 -0
  280. package/probes/arm_sve2p1.c +18 -0
  281. package/probes/arm_sve_bfdot.c +20 -0
  282. package/probes/arm_sve_half.c +18 -0
  283. package/probes/arm_sve_sdot.c +21 -0
  284. package/probes/loongarch_lasx.c +12 -0
  285. package/probes/power_vsx.c +12 -0
  286. package/probes/probe.js +127 -0
  287. package/probes/riscv_rvv.c +14 -0
  288. package/probes/riscv_rvv_bb.c +15 -0
  289. package/probes/riscv_rvv_bf16.c +17 -0
  290. package/probes/riscv_rvv_half.c +14 -0
  291. package/probes/wasm_v128relaxed.c +11 -0
  292. package/probes/x86_alder.c +17 -0
  293. package/probes/x86_diamond.c +17 -0
  294. package/probes/x86_genoa.c +17 -0
  295. package/probes/x86_graniteamx.c +19 -0
  296. package/probes/x86_haswell.c +11 -0
  297. package/probes/x86_icelake.c +17 -0
  298. package/probes/x86_sapphire.c +16 -0
  299. package/probes/x86_sapphireamx.c +18 -0
  300. package/probes/x86_sierra.c +17 -0
  301. package/probes/x86_skylake.c +15 -0
  302. package/probes/x86_turin.c +17 -0
  303. package/wasm/numkong-emscripten.js +2 -0
  304. package/wasm/numkong.d.ts +14 -0
  305. package/wasm/numkong.js +1124 -0
  306. package/wasm/numkong.wasm +0 -0
  307. package/include/numkong/curved/neonhalf.h +0 -212
  308. package/include/numkong/dot/neonhalf.h +0 -198
  309. package/include/numkong/dots/neonhalf.h +0 -57
  310. package/include/numkong/mesh/neonhalf.h +0 -616
  311. package/include/numkong/reduce/neonhalf.h +0 -157
  312. package/include/numkong/spatial/neonhalf.h +0 -118
  313. package/include/numkong/spatial/sapphire.h +0 -343
  314. package/include/numkong/spatials/neonhalf.h +0 -58
  315. package/javascript/README.md +0 -246
@@ -126,6 +126,14 @@ export class VectorView extends VectorBase {
126
126
  super(buffer, byteOffset, length, dtype);
127
127
  }
128
128
 
129
+ toString(): string {
130
+ return `VectorView(${this.length}, ${dtypeToString(this.dtype)})`;
131
+ }
132
+
133
+ [Symbol.for('nodejs.util.inspect.custom')](): string {
134
+ return this.toString();
135
+ }
136
+
129
137
  /** @brief Create a VectorView from any TypedArray, inferring or accepting dtype. */
130
138
  static from(arr: TypedArray, dtype?: DType): VectorView {
131
139
  const d = dtype ?? inferDtype(arr);
@@ -159,6 +167,14 @@ export class Vector extends VectorBase {
159
167
  }
160
168
  }
161
169
 
170
+ toString(): string {
171
+ return `Vector(${this.length}, ${dtypeToString(this.dtype)})`;
172
+ }
173
+
174
+ [Symbol.for('nodejs.util.inspect.custom')](): string {
175
+ return this.toString();
176
+ }
177
+
162
178
  /** @brief Create an owning Vector by copying data from a TypedArray. */
163
179
  static fromTypedArray(arr: TypedArray, dtype?: DType): Vector {
164
180
  const d = dtype ?? inferDtype(arr);
@@ -254,6 +270,14 @@ export class Matrix extends MatrixBase {
254
270
  }
255
271
  }
256
272
 
273
+ toString(): string {
274
+ return `Matrix(${this.rows}\u00d7${this.cols}, ${dtypeToString(this.dtype)})`;
275
+ }
276
+
277
+ [Symbol.for('nodejs.util.inspect.custom')](): string {
278
+ return this.toString();
279
+ }
280
+
257
281
  static fromTypedArray(array: TypedArray, rows: number, cols: number, dtype?: DType): Matrix {
258
282
  const d = dtype ?? inferDtype(array);
259
283
  const buf = (array.buffer as ArrayBuffer).slice(array.byteOffset, array.byteOffset + array.byteLength);
@@ -302,6 +326,14 @@ export class PackedMatrix {
302
326
 
303
327
  dispose(): void { this._disposed = true; }
304
328
  get disposed(): boolean { return this._disposed; }
329
+
330
+ toString(): string {
331
+ return `PackedMatrix(${this.width}\u00d7${this.depth}, ${dtypeToString(this.dtype)}, ${this.byteLength} bytes)`;
332
+ }
333
+
334
+ [Symbol.for('nodejs.util.inspect.custom')](): string {
335
+ return this.toString();
336
+ }
305
337
  }
306
338
 
307
339
  /** @brief Kernel family identifiers for output dtype resolution. */
@@ -389,6 +421,22 @@ export class Float16Array extends Uint16Array {
389
421
  }
390
422
  this[index] = conversionFunctions.castF32ToF16(value);
391
423
  }
424
+
425
+ toString(): string {
426
+ if (!conversionFunctions) return `Float16Array(${this.length})`;
427
+ const limit = Math.min(this.length, 20);
428
+ const parts: string[] = [];
429
+ for (let i = 0; i < limit; i++) {
430
+ const f = conversionFunctions.castF16ToF32(this[i]);
431
+ parts.push(`${f} [0x${this[i].toString(16).padStart(4, '0')}]`);
432
+ }
433
+ const suffix = this.length > 20 ? ', ...' : '';
434
+ return `Float16Array(${this.length}) [${parts.join(', ')}${suffix}]`;
435
+ }
436
+
437
+ [Symbol.for('nodejs.util.inspect.custom')](): string {
438
+ return this.toString();
439
+ }
392
440
  }
393
441
 
394
442
  /**
@@ -443,6 +491,22 @@ export class BFloat16Array extends Uint16Array {
443
491
  }
444
492
  this[index] = conversionFunctions.castF32ToBF16(value);
445
493
  }
494
+
495
+ toString(): string {
496
+ if (!conversionFunctions) return `BFloat16Array(${this.length})`;
497
+ const limit = Math.min(this.length, 20);
498
+ const parts: string[] = [];
499
+ for (let i = 0; i < limit; i++) {
500
+ const f = conversionFunctions.castBF16ToF32(this[i]);
501
+ parts.push(`${f} [0x${this[i].toString(16).padStart(4, '0')}]`);
502
+ }
503
+ const suffix = this.length > 20 ? ', ...' : '';
504
+ return `BFloat16Array(${this.length}) [${parts.join(', ')}${suffix}]`;
505
+ }
506
+
507
+ [Symbol.for('nodejs.util.inspect.custom')](): string {
508
+ return this.toString();
509
+ }
446
510
  }
447
511
 
448
512
  /**
@@ -496,6 +560,22 @@ export class E4M3Array extends Uint8Array {
496
560
  }
497
561
  this[index] = conversionFunctions.castF32ToE4M3(value);
498
562
  }
563
+
564
+ toString(): string {
565
+ if (!conversionFunctions) return `E4M3Array(${this.length})`;
566
+ const limit = Math.min(this.length, 20);
567
+ const parts: string[] = [];
568
+ for (let i = 0; i < limit; i++) {
569
+ const f = conversionFunctions.castE4M3ToF32(this[i]);
570
+ parts.push(`${f} [0x${this[i].toString(16).padStart(2, '0')}]`);
571
+ }
572
+ const suffix = this.length > 20 ? ', ...' : '';
573
+ return `E4M3Array(${this.length}) [${parts.join(', ')}${suffix}]`;
574
+ }
575
+
576
+ [Symbol.for('nodejs.util.inspect.custom')](): string {
577
+ return this.toString();
578
+ }
499
579
  }
500
580
 
501
581
  /**
@@ -549,6 +629,22 @@ export class E5M2Array extends Uint8Array {
549
629
  }
550
630
  this[index] = conversionFunctions.castF32ToE5M2(value);
551
631
  }
632
+
633
+ toString(): string {
634
+ if (!conversionFunctions) return `E5M2Array(${this.length})`;
635
+ const limit = Math.min(this.length, 20);
636
+ const parts: string[] = [];
637
+ for (let i = 0; i < limit; i++) {
638
+ const f = conversionFunctions.castE5M2ToF32(this[i]);
639
+ parts.push(`${f} [0x${this[i].toString(16).padStart(2, '0')}]`);
640
+ }
641
+ const suffix = this.length > 20 ? ', ...' : '';
642
+ return `E5M2Array(${this.length}) [${parts.join(', ')}${suffix}]`;
643
+ }
644
+
645
+ [Symbol.for('nodejs.util.inspect.custom')](): string {
646
+ return this.toString();
647
+ }
552
648
  }
553
649
 
554
650
  /**
@@ -636,6 +732,20 @@ export class BinaryArray extends Uint8Array {
636
732
  }
637
733
  return binary;
638
734
  }
735
+
736
+ toString(): string {
737
+ const limit = Math.min(this.length, 20);
738
+ const parts: string[] = [];
739
+ for (let i = 0; i < limit; i++) {
740
+ parts.push(`0b${this[i].toString(2).padStart(8, '0')}`);
741
+ }
742
+ const suffix = this.length > 20 ? ', ...' : '';
743
+ return `BinaryArray(${this._bitLength}) [${parts.join(', ')}${suffix}]`;
744
+ }
745
+
746
+ [Symbol.for('nodejs.util.inspect.custom')](): string {
747
+ return this.toString();
748
+ }
639
749
  }
640
750
 
641
751
  /**
package/numkong.gypi ADDED
@@ -0,0 +1,101 @@
1
+ # NumKong GYP include for downstream native addons.
2
+ #
3
+ # Usage in your binding.gyp:
4
+ #
5
+ # {
6
+ # "includes": ["<!(node -p \"require.resolve('numkong/numkong.gypi')\")"],
7
+ # "targets": [{
8
+ # "target_name": "my_addon",
9
+ # "dependencies": ["numkong_lib"],
10
+ # "sources": ["my_addon.c"],
11
+ # }]
12
+ # }
13
+ #
14
+ {
15
+ "variables": {
16
+ "numkong_root%": "<!(node -e \"try{console.log(require('path').dirname(require.resolve('numkong/package.json')))}catch{console.log('numkong')}\")",
17
+ },
18
+ "targets": [
19
+ {
20
+ "target_name": "numkong_lib",
21
+ "type": "static_library",
22
+ "actions": [
23
+ {
24
+ "action_name": "numkong_probe",
25
+ "inputs": ["<(numkong_root)/probes/probe.js"],
26
+ "outputs": ["<!(node -e \"console.log(require('path').resolve('<(numkong_root)','nk_probes.h'))\")"],
27
+ "action": ["node", "<(numkong_root)/probes/probe.js"],
28
+ "message": "Probing ISA capabilities for NumKong",
29
+ },
30
+ ],
31
+ "sources": [
32
+ "<(numkong_root)/c/numkong.c",
33
+ "<(numkong_root)/c/dispatch_f64.c",
34
+ "<(numkong_root)/c/dispatch_f32.c",
35
+ "<(numkong_root)/c/dispatch_f16.c",
36
+ "<(numkong_root)/c/dispatch_bf16.c",
37
+ "<(numkong_root)/c/dispatch_i8.c",
38
+ "<(numkong_root)/c/dispatch_u8.c",
39
+ "<(numkong_root)/c/dispatch_u1.c",
40
+ "<(numkong_root)/c/dispatch_e4m3.c",
41
+ "<(numkong_root)/c/dispatch_e5m2.c",
42
+ "<(numkong_root)/c/dispatch_other.c",
43
+ "<(numkong_root)/c/dispatch_f64c.c",
44
+ "<(numkong_root)/c/dispatch_f32c.c",
45
+ "<(numkong_root)/c/dispatch_f16c.c",
46
+ "<(numkong_root)/c/dispatch_bf16c.c",
47
+ "<(numkong_root)/c/dispatch_i16.c",
48
+ "<(numkong_root)/c/dispatch_i32.c",
49
+ "<(numkong_root)/c/dispatch_i64.c",
50
+ "<(numkong_root)/c/dispatch_u16.c",
51
+ "<(numkong_root)/c/dispatch_u32.c",
52
+ "<(numkong_root)/c/dispatch_u64.c",
53
+ "<(numkong_root)/c/dispatch_i4.c",
54
+ "<(numkong_root)/c/dispatch_u4.c",
55
+ "<(numkong_root)/c/dispatch_e2m3.c",
56
+ "<(numkong_root)/c/dispatch_e3m2.c",
57
+ ],
58
+ "include_dirs": [
59
+ "<(numkong_root)/include",
60
+ ],
61
+ "defines": [
62
+ "NK_NATIVE_F16=0",
63
+ "NK_NATIVE_BF16=0",
64
+ "NK_DYNAMIC_DISPATCH=1",
65
+ ],
66
+ "cflags": [
67
+ "-std=c11",
68
+ "-O3",
69
+ "-Wno-unknown-pragmas",
70
+ "-Wno-maybe-uninitialized",
71
+ "-Wno-cast-function-type",
72
+ "-Wno-switch",
73
+ "-Wno-psabi",
74
+ "-include",
75
+ "<!(node -e \"console.log(require('path').resolve('<(numkong_root)','nk_probes.h'))\")",
76
+ ],
77
+ "msvs_settings": {
78
+ "VCCLCompilerTool": {
79
+ "ForcedIncludeFiles": [
80
+ "<!(node -e \"console.log(require('path').resolve('<(numkong_root)','nk_probes.h'))\")",
81
+ ],
82
+ },
83
+ },
84
+ "conditions": [
85
+ [
86
+ "OS=='mac'",
87
+ {
88
+ "xcode_settings": {
89
+ "MACOSX_DEPLOYMENT_TARGET": "11.0",
90
+ },
91
+ },
92
+ ],
93
+ ],
94
+ "direct_dependent_settings": {
95
+ "include_dirs": [
96
+ "<(numkong_root)/include",
97
+ ],
98
+ },
99
+ },
100
+ ],
101
+ }
package/package.json CHANGED
@@ -1,10 +1,14 @@
1
1
  {
2
2
  "name": "numkong",
3
- "version": "7.0.0",
4
- "description": "Portable mixed-precision BLAS-like vector math library for x86 and ARM",
3
+ "version": "7.4.2",
4
+ "description": "Portable mixed-precision math, linear-algebra, & retrieval library with 2000+ SIMD kernels for x86, Arm, RISC-V, LoongArch, Power, & WebAssembly",
5
5
  "homepage": "https://github.com/ashvardanian/NumKong",
6
6
  "author": "Ash Vardanian",
7
- "license": "Apache 2.0",
7
+ "license": "Apache-2.0",
8
+ "repository": {
9
+ "type": "git",
10
+ "url": "https://github.com/ashvardanian/NumKong"
11
+ },
8
12
  "keywords": [
9
13
  "vector",
10
14
  "distance",
@@ -22,11 +26,10 @@
22
26
  "f16"
23
27
  ],
24
28
  "scripts": {
29
+ "preinstall": "node probes/probe.js",
25
30
  "install": "node-gyp-build",
26
- "prebuild-single": "prebuildify --napi --strip --target=22.0.0",
27
- "prebuild-arm64": "prebuildify --arch arm64 --napi --strip --target=22.0.0",
28
- "prebuild-darwin-x64+arm64": "prebuildify --arch arm64+x64 --napi --strip --target=22.0.0",
29
31
  "build-js": "rm -fr javascript/dist/* && tsc -p javascript/tsconfig-esm.json && tsc -p javascript/tsconfig-cjs.json && cp javascript/dist-package-esm.json javascript/dist/esm/package.json && cp javascript/dist-package-cjs.json javascript/dist/cjs/package.json",
32
+ "build-browser": "esbuild javascript/dist/esm/numkong-browser.js --bundle --format=esm --platform=browser --target=es2022 --outfile=build-wasm/numkong-bundle.js",
30
33
  "test": "node --test ./test/test.mjs",
31
34
  "test:bun": "bun test ./test/test.mjs",
32
35
  "test:deno": "deno test -A --no-check",
@@ -49,7 +52,13 @@
49
52
  ".": {
50
53
  "import": "./javascript/dist/esm/numkong.js",
51
54
  "require": "./javascript/dist/cjs/numkong.js"
52
- }
55
+ },
56
+ "./wasm": {
57
+ "import": "./wasm/numkong.js",
58
+ "types": "./wasm/numkong.d.ts"
59
+ },
60
+ "./numkong.gypi": "./numkong.gypi",
61
+ "./package.json": "./package.json"
53
62
  },
54
63
  "engines": {
55
64
  "node": ">=22.0.0"
@@ -65,23 +74,35 @@
65
74
  "@types/node": "^24.10.0",
66
75
  "node-gyp": "^11.5.0",
67
76
  "playwright": "^1.58.0",
68
- "prebuildify": "^6.0.1",
69
77
  "http-server": "^14.1.1",
70
- "typescript": "^5.9.3"
78
+ "esbuild": "^0.25.0",
79
+ "typescript": "^5.9.3",
80
+ "benchmark": "^2.1.4",
81
+ "mathjs": "^14.9.0",
82
+ "usearch": "^2.21.0"
71
83
  },
72
84
  "files": [
73
85
  "c/",
74
86
  "include/",
75
- "javascript/",
87
+ "javascript/dist/",
88
+ "javascript/*.ts",
89
+ "javascript/*.json",
90
+ "javascript/*.c",
91
+ "probes/",
92
+ "wasm/",
76
93
  "binding.gyp",
94
+ "numkong.gypi",
77
95
  "LICENSE"
78
96
  ],
79
97
  "prettier": {
80
98
  "printWidth": 120
81
99
  },
82
100
  "optionalDependencies": {
83
- "benchmark": "^2.1.4",
84
- "mathjs": "^14.9.0",
85
- "usearch": "^2.21.0"
101
+ "@numkong/darwin-arm64": "7.4.2",
102
+ "@numkong/darwin-x64": "7.4.2",
103
+ "@numkong/linux-arm64": "7.4.2",
104
+ "@numkong/linux-x64": "7.4.2",
105
+ "@numkong/win32-arm64": "7.4.2",
106
+ "@numkong/win32-x64": "7.4.2"
86
107
  }
87
108
  }
@@ -0,0 +1,8 @@
1
+ /* NumKong ISA probe: NEON (AArch64 baseline SIMD) */
2
+ #include <arm_neon.h>
3
+ int main(void) {
4
+ float32x4_t a = vdupq_n_f32(1.0f);
5
+ float32x4_t b = vdupq_n_f32(2.0f);
6
+ float32x4_t c = vaddq_f32(a, b);
7
+ return vgetq_lane_f32(c, 0) > 0.0f ? 0 : 1;
8
+ }
@@ -0,0 +1,9 @@
1
+ /* NumKong ISA probe: NEON BF16 (ARMv8.6-A bfloat16 dot product) */
2
+ #include <arm_neon.h>
3
+ int main(void) {
4
+ bfloat16x8_t a = vdupq_n_bf16(1.0f);
5
+ bfloat16x8_t b = vdupq_n_bf16(2.0f);
6
+ float32x4_t c = vdupq_n_f32(0.0f);
7
+ c = vbfdotq_f32(c, a, b);
8
+ return vgetq_lane_f32(c, 0) > 0.0f ? 0 : 1;
9
+ }
@@ -0,0 +1,9 @@
1
+ /* NumKong ISA probe: NEON FHM (ARMv8.2-A FP16 fused multiply-add) */
2
+ #include <arm_neon.h>
3
+ int main(void) {
4
+ float16x8_t a = vdupq_n_f16(1.0f);
5
+ float16x8_t b = vdupq_n_f16(2.0f);
6
+ float32x4_t c = vdupq_n_f32(0.0f);
7
+ c = vfmlalq_low_f16(c, a, b);
8
+ return vgetq_lane_f32(c, 0) > 0.0f ? 0 : 1;
9
+ }
@@ -0,0 +1,8 @@
1
+ /* NumKong ISA probe: NEON F16 (ARMv8.2-A half-precision) */
2
+ #include <arm_neon.h>
3
+ int main(void) {
4
+ float16x8_t a = vdupq_n_f16(1.0f);
5
+ float16x8_t b = vdupq_n_f16(2.0f);
6
+ float16x8_t c = vaddq_f16(a, b);
7
+ return vgetq_lane_f16(c, 0) > 0.0f ? 0 : 1;
8
+ }
@@ -0,0 +1,9 @@
1
+ /* NumKong ISA probe: NEON SDOT (ARMv8.2-A dot product) */
2
+ #include <arm_neon.h>
3
+ int main(void) {
4
+ int8x16_t a = vdupq_n_s8(1);
5
+ int8x16_t b = vdupq_n_s8(2);
6
+ int32x4_t c = vdupq_n_s32(0);
7
+ c = vdotq_s32(c, a, b);
8
+ return vgetq_lane_s32(c, 0) > 0 ? 0 : 1;
9
+ }
@@ -0,0 +1,9 @@
1
+ /* NumKong ISA probe: NEON FP8 (fp8dot4) */
2
+ #include <arm_neon.h>
3
+ int test_neonfp8(void) {
4
+ mfloat8x16_t a = vreinterpretq_mf8_u8(vdupq_n_u8(0));
5
+ float32x4_t acc = vdupq_n_f32(0.0f);
6
+ acc = vdot_f32_mf8_fpm(acc, a, a, 0);
7
+ return vgetq_lane_f32(acc, 0) == 0.0f ? 0 : 1;
8
+ }
9
+ int main(void) { return test_neonfp8(); }
@@ -0,0 +1,16 @@
1
+ /* NumKong ISA probe: SME (Scalable Matrix Extension) */
2
+ #if defined(_WIN32)
3
+ #error "SVE/SME not supported on Windows ARM"
4
+ #endif
5
+
6
+ #if !defined(__ARM_FEATURE_SME)
7
+ #error "Feature not available"
8
+ #endif
9
+ #include <arm_sme.h>
10
+ __arm_new("za") __arm_locally_streaming int test_sme(void) {
11
+ svfloat32_t a = svdup_f32(1.0f);
12
+ svbool_t p = svptrue_b32();
13
+ svmopa_za32_f32_m(0, p, p, a, a);
14
+ return 0;
15
+ }
16
+ int main(void) { return test_sme(); }
@@ -0,0 +1,16 @@
1
+ /* NumKong ISA probe: SME2 */
2
+ #if defined(_WIN32)
3
+ #error "SVE/SME not supported on Windows ARM"
4
+ #endif
5
+
6
+ #if !defined(__ARM_FEATURE_SME2)
7
+ #error "Feature not available"
8
+ #endif
9
+ #include <arm_sme.h>
10
+ __arm_new("za") __arm_locally_streaming int test_sme2(void) {
11
+ svfloat32_t a = svdup_f32(1.0f);
12
+ svbool_t p = svptrue_b32();
13
+ svmopa_za32_f32_m(0, p, p, a, a);
14
+ return 0;
15
+ }
16
+ int main(void) { return test_sme2(); }
@@ -0,0 +1,16 @@
1
+ /* NumKong ISA probe: SME2P1 */
2
+ #if defined(_WIN32)
3
+ #error "SVE/SME not supported on Windows ARM"
4
+ #endif
5
+
6
+ #if !defined(__ARM_FEATURE_SME2)
7
+ #error "Feature not available"
8
+ #endif
9
+ #include <arm_sme.h>
10
+ __arm_new("za") __arm_locally_streaming int test_sme2p1(void) {
11
+ svfloat32_t a = svdup_f32(1.0f);
12
+ svbool_t p = svptrue_b32();
13
+ svmopa_za32_f32_m(0, p, p, a, a);
14
+ return 0;
15
+ }
16
+ int main(void) { return test_sme2p1(); }
@@ -0,0 +1,16 @@
1
+ /* NumKong ISA probe: SME BF16 (BFloat16 outer product) */
2
+ #if defined(_WIN32)
3
+ #error "SVE/SME not supported on Windows ARM"
4
+ #endif
5
+
6
+ #if !defined(__ARM_FEATURE_SME)
7
+ #error "Feature not available"
8
+ #endif
9
+ #include <arm_sme.h>
10
+ __arm_new("za") __arm_locally_streaming int test_smebf16(void) {
11
+ svbfloat16_t a = svdup_bf16(0.0f);
12
+ svbool_t p = svptrue_b16();
13
+ svmopa_za32_bf16_m(0, p, p, a, a);
14
+ return 0;
15
+ }
16
+ int main(void) { return test_smebf16(); }
@@ -0,0 +1,16 @@
1
+ /* NumKong ISA probe: SME BI32 (boolean/integer 32-bit outer product) */
2
+ #if defined(_WIN32)
3
+ #error "SVE/SME not supported on Windows ARM"
4
+ #endif
5
+
6
+ #if !defined(__ARM_FEATURE_SME2)
7
+ #error "Feature not available"
8
+ #endif
9
+ #include <arm_sme.h>
10
+ __arm_new("za") __arm_locally_streaming int test_smebi32(void) {
11
+ svuint32_t a = svdup_u32(1);
12
+ svbool_t p = svptrue_b32();
13
+ svbmopa_za32_u32_m(0, p, p, a, a);
14
+ return 0;
15
+ }
16
+ int main(void) { return test_smebi32(); }
@@ -0,0 +1,16 @@
1
+ /* NumKong ISA probe: SME F64 (FEAT_SME_F64F64) */
2
+ #if defined(_WIN32)
3
+ #error "SVE/SME not supported on Windows ARM"
4
+ #endif
5
+
6
+ #if !defined(__ARM_FEATURE_SME)
7
+ #error "Feature not available"
8
+ #endif
9
+ #include <arm_sme.h>
10
+ __arm_new("za") __arm_locally_streaming int test_smef64(void) {
11
+ svfloat64_t a = svdup_f64(1.0);
12
+ svbool_t p = svptrue_b64();
13
+ svmopa_za64_f64_m(0, p, p, a, a);
14
+ return 0;
15
+ }
16
+ int main(void) { return test_smef64(); }
@@ -0,0 +1,14 @@
1
+ /* NumKong ISA probe: SME FA64 (FEAT_SME_FA64, full SVE2 in streaming mode) */
2
+ #if defined(_WIN32)
3
+ #error "SVE/SME not supported on Windows ARM"
4
+ #endif
5
+
6
+ #if !defined(__ARM_FEATURE_SME)
7
+ #error "Feature not available"
8
+ #endif
9
+ #include <arm_sme.h>
10
+ __arm_locally_streaming int test_smefa64(void) {
11
+ svfloat32_t a = svdup_f32(1.0f);
12
+ return (int)svaddv_f32(svptrue_b32(), a) > 0 ? 0 : 1;
13
+ }
14
+ int main(void) { return test_smefa64(); }
@@ -0,0 +1,16 @@
1
+ /* NumKong ISA probe: SME F16 (FEAT_SME_F16F16) */
2
+ #if defined(_WIN32)
3
+ #error "SVE/SME not supported on Windows ARM"
4
+ #endif
5
+
6
+ #if !defined(__ARM_FEATURE_SME)
7
+ #error "Feature not available"
8
+ #endif
9
+ #include <arm_sme.h>
10
+ __arm_new("za") __arm_locally_streaming int test_smehalf(void) {
11
+ svfloat16_t a = svdup_f16((__fp16)1.0f);
12
+ svbool_t p = svptrue_b16();
13
+ svmopa_za32_f16_m(0, p, p, a, a);
14
+ return 0;
15
+ }
16
+ int main(void) { return test_smehalf(); }
@@ -0,0 +1,15 @@
1
+ /* NumKong ISA probe: SME LUT2 (FEAT_SME_LUTv2) */
2
+ #if defined(_WIN32)
3
+ #error "SVE/SME not supported on Windows ARM"
4
+ #endif
5
+
6
+ #if !defined(__ARM_FEATURE_SME2)
7
+ #error "Feature not available"
8
+ #endif
9
+ #include <arm_sme.h>
10
+ __arm_new("zt0") __arm_locally_streaming int test_smelut2(void) {
11
+ svuint8_t idx = svdup_u8(0);
12
+ svuint8_t r = svluti2_lane_zt_u8(0, idx, 0);
13
+ return (int)svaddv_u8(svptrue_b8(), r) == 0 ? 0 : 1;
14
+ }
15
+ int main(void) { return test_smelut2(); }
@@ -0,0 +1,18 @@
1
+ /* NumKong ISA probe: SVE (Scalable Vector Extension) */
2
+ #if defined(_WIN32)
3
+ #error "SVE/SME not supported on Windows ARM"
4
+ #endif
5
+
6
+ #if defined(__APPLE__) && defined(__aarch64__)
7
+ #error "SVE not available on Apple Silicon"
8
+ #endif
9
+
10
+ #if !defined(__ARM_FEATURE_SVE)
11
+ #error "Feature not available"
12
+ #endif
13
+ #include <arm_sve.h>
14
+ int test_sve(void) {
15
+ svfloat32_t z = svdup_f32(1.0f);
16
+ return (int)svaddv_f32(svptrue_b32(), z);
17
+ }
18
+ int main(void) { return test_sve() > 0 ? 0 : 1; }
@@ -0,0 +1,20 @@
1
+ /* NumKong ISA probe: SVE2 */
2
+ #if defined(_WIN32)
3
+ #error "SVE/SME not supported on Windows ARM"
4
+ #endif
5
+
6
+ #if defined(__APPLE__) && defined(__aarch64__)
7
+ #error "SVE not available on Apple Silicon"
8
+ #endif
9
+
10
+ #if !defined(__ARM_FEATURE_SVE2)
11
+ #error "Feature not available"
12
+ #endif
13
+ #include <arm_sve.h>
14
+ int test_sve2(void) {
15
+ svint32_t a = svdup_s32(2);
16
+ svint32_t b = svdup_s32(3);
17
+ svint32_t c = svmul_s32_z(svptrue_b32(), a, b);
18
+ return (int)svaddv_s32(svptrue_b32(), c) > 0 ? 0 : 1;
19
+ }
20
+ int main(void) { return test_sve2(); }
@@ -0,0 +1,18 @@
1
+ /* NumKong ISA probe: SVE2P1 */
2
+ #if defined(_WIN32)
3
+ #error "SVE/SME not supported on Windows ARM"
4
+ #endif
5
+
6
+ #if defined(__APPLE__) && defined(__aarch64__)
7
+ #error "SVE not available on Apple Silicon"
8
+ #endif
9
+
10
+ #if !defined(__ARM_FEATURE_SVE2)
11
+ #error "Feature not available"
12
+ #endif
13
+ #include <arm_sve.h>
14
+ int test_sve2p1(void) {
15
+ svfloat32_t a = svdup_f32(1.0f);
16
+ return (int)svaddv_f32(svptrue_b32(), a) > 0 ? 0 : 1;
17
+ }
18
+ int main(void) { return test_sve2p1(); }
@@ -0,0 +1,20 @@
1
+ /* NumKong ISA probe: SVE BF16 (FEAT_BF16 dot-product) */
2
+ #if defined(_WIN32)
3
+ #error "SVE/SME not supported on Windows ARM"
4
+ #endif
5
+
6
+ #if defined(__APPLE__) && defined(__aarch64__)
7
+ #error "SVE not available on Apple Silicon"
8
+ #endif
9
+
10
+ #if !defined(__ARM_FEATURE_SVE)
11
+ #error "Feature not available"
12
+ #endif
13
+ #include <arm_sve.h>
14
+ int test_svebfdot(void) {
15
+ svfloat32_t acc = svdup_f32(0.0f);
16
+ svbfloat16_t a = svdup_bf16(0.0f);
17
+ acc = svbfdot_f32(acc, a, a);
18
+ return (int)svaddv_f32(svptrue_b32(), acc) == 0 ? 0 : 1;
19
+ }
20
+ int main(void) { return test_svebfdot(); }
@@ -0,0 +1,18 @@
1
+ /* NumKong ISA probe: SVE F16 (half-precision) */
2
+ #if defined(_WIN32)
3
+ #error "SVE/SME not supported on Windows ARM"
4
+ #endif
5
+
6
+ #if defined(__APPLE__) && defined(__aarch64__)
7
+ #error "SVE not available on Apple Silicon"
8
+ #endif
9
+
10
+ #if !defined(__ARM_FEATURE_SVE)
11
+ #error "Feature not available"
12
+ #endif
13
+ #include <arm_sve.h>
14
+ int test_svehalf(void) {
15
+ svfloat16_t z = svdup_f16((__fp16)1.0f);
16
+ return (int)svaddv_f16(svptrue_b16(), z);
17
+ }
18
+ int main(void) { return test_svehalf() > 0 ? 0 : 1; }