numkong 7.4.1 → 7.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (173) hide show
  1. package/README.md +86 -130
  2. package/binding.gyp +16 -0
  3. package/c/numkong.c +1 -1
  4. package/include/numkong/attention/sapphireamx.h +2 -2
  5. package/include/numkong/attention/sme.h +2 -2
  6. package/include/numkong/capabilities.h +47 -47
  7. package/include/numkong/cast/diamond.h +2 -2
  8. package/include/numkong/cast/haswell.h +2 -2
  9. package/include/numkong/cast/icelake.h +2 -2
  10. package/include/numkong/cast/loongsonasx.h +2 -2
  11. package/include/numkong/cast/neon.h +2 -2
  12. package/include/numkong/cast/powervsx.h +2 -2
  13. package/include/numkong/cast/rvv.h +2 -2
  14. package/include/numkong/cast/sapphire.h +2 -2
  15. package/include/numkong/cast/skylake.h +2 -2
  16. package/include/numkong/curved/genoa.h +2 -2
  17. package/include/numkong/curved/haswell.h +2 -2
  18. package/include/numkong/curved/neon.h +2 -2
  19. package/include/numkong/curved/neonbfdot.h +2 -2
  20. package/include/numkong/curved/rvv.h +2 -2
  21. package/include/numkong/curved/skylake.h +2 -2
  22. package/include/numkong/curved/smef64.h +2 -2
  23. package/include/numkong/dot/alder.h +2 -2
  24. package/include/numkong/dot/diamond.h +2 -2
  25. package/include/numkong/dot/genoa.h +2 -2
  26. package/include/numkong/dot/haswell.h +2 -2
  27. package/include/numkong/dot/icelake.h +2 -2
  28. package/include/numkong/dot/loongsonasx.h +2 -2
  29. package/include/numkong/dot/neon.h +2 -2
  30. package/include/numkong/dot/neonbfdot.h +2 -2
  31. package/include/numkong/dot/neonfhm.h +2 -2
  32. package/include/numkong/dot/neonfp8.h +2 -2
  33. package/include/numkong/dot/neonsdot.h +2 -2
  34. package/include/numkong/dot/rvv.h +2 -2
  35. package/include/numkong/dot/rvvbb.h +2 -2
  36. package/include/numkong/dot/rvvbf16.h +2 -2
  37. package/include/numkong/dot/rvvhalf.h +2 -2
  38. package/include/numkong/dot/sapphire.h +2 -2
  39. package/include/numkong/dot/sierra.h +2 -2
  40. package/include/numkong/dot/skylake.h +2 -2
  41. package/include/numkong/dot/sve.h +2 -2
  42. package/include/numkong/dot/svebfdot.h +2 -2
  43. package/include/numkong/dot/svehalf.h +2 -2
  44. package/include/numkong/dot/svesdot.h +2 -2
  45. package/include/numkong/dots/alder.h +2 -2
  46. package/include/numkong/dots/diamond.h +2 -2
  47. package/include/numkong/dots/genoa.h +2 -2
  48. package/include/numkong/dots/haswell.h +2 -2
  49. package/include/numkong/dots/icelake.h +2 -2
  50. package/include/numkong/dots/loongsonasx.h +2 -2
  51. package/include/numkong/dots/neon.h +2 -2
  52. package/include/numkong/dots/neonbfdot.h +2 -2
  53. package/include/numkong/dots/neonfhm.h +2 -2
  54. package/include/numkong/dots/neonfp8.h +2 -2
  55. package/include/numkong/dots/neonsdot.h +2 -2
  56. package/include/numkong/dots/powervsx.h +2 -2
  57. package/include/numkong/dots/rvv.h +2 -2
  58. package/include/numkong/dots/sapphireamx.h +2 -2
  59. package/include/numkong/dots/sierra.h +2 -2
  60. package/include/numkong/dots/skylake.h +2 -2
  61. package/include/numkong/dots/sme.h +10 -10
  62. package/include/numkong/dots/smebi32.h +2 -2
  63. package/include/numkong/dots/smef64.h +2 -2
  64. package/include/numkong/dots/smehalf.h +2 -2
  65. package/include/numkong/each/haswell.h +2 -2
  66. package/include/numkong/each/icelake.h +2 -2
  67. package/include/numkong/each/neon.h +2 -2
  68. package/include/numkong/each/neonbfdot.h +2 -2
  69. package/include/numkong/each/neonhalf.h +2 -2
  70. package/include/numkong/each/rvv.h +2 -2
  71. package/include/numkong/each/sapphire.h +2 -2
  72. package/include/numkong/each/skylake.h +2 -2
  73. package/include/numkong/geospatial/haswell.h +2 -2
  74. package/include/numkong/geospatial/neon.h +2 -2
  75. package/include/numkong/geospatial/rvv.h +2 -2
  76. package/include/numkong/geospatial/skylake.h +2 -2
  77. package/include/numkong/maxsim/alder.h +2 -2
  78. package/include/numkong/maxsim/genoa.h +2 -2
  79. package/include/numkong/maxsim/haswell.h +2 -2
  80. package/include/numkong/maxsim/icelake.h +2 -2
  81. package/include/numkong/maxsim/neonsdot.h +2 -2
  82. package/include/numkong/maxsim/sapphireamx.h +2 -2
  83. package/include/numkong/maxsim/sme.h +2 -2
  84. package/include/numkong/mesh/haswell.h +2 -2
  85. package/include/numkong/mesh/neon.h +2 -2
  86. package/include/numkong/mesh/neonbfdot.h +2 -2
  87. package/include/numkong/mesh/rvv.h +2 -2
  88. package/include/numkong/mesh/skylake.h +2 -2
  89. package/include/numkong/numkong.h +1 -1
  90. package/include/numkong/probability/haswell.h +2 -2
  91. package/include/numkong/probability/neon.h +2 -2
  92. package/include/numkong/probability/rvv.h +2 -2
  93. package/include/numkong/probability/skylake.h +2 -2
  94. package/include/numkong/reduce/alder.h +2 -2
  95. package/include/numkong/reduce/genoa.h +2 -2
  96. package/include/numkong/reduce/haswell.h +2 -2
  97. package/include/numkong/reduce/icelake.h +2 -2
  98. package/include/numkong/reduce/neon.h +2 -2
  99. package/include/numkong/reduce/neonbfdot.h +2 -2
  100. package/include/numkong/reduce/neonfhm.h +2 -2
  101. package/include/numkong/reduce/neonsdot.h +2 -2
  102. package/include/numkong/reduce/rvv.h +2 -2
  103. package/include/numkong/reduce/sierra.h +2 -2
  104. package/include/numkong/reduce/skylake.h +2 -2
  105. package/include/numkong/scalar/haswell.h +2 -2
  106. package/include/numkong/scalar/loongsonasx.h +2 -2
  107. package/include/numkong/scalar/neon.h +2 -2
  108. package/include/numkong/scalar/neonhalf.h +2 -2
  109. package/include/numkong/scalar/powervsx.h +2 -2
  110. package/include/numkong/scalar/rvv.h +2 -2
  111. package/include/numkong/scalar/sapphire.h +2 -2
  112. package/include/numkong/set/haswell.h +2 -2
  113. package/include/numkong/set/icelake.h +2 -2
  114. package/include/numkong/set/loongsonasx.h +2 -2
  115. package/include/numkong/set/neon.h +2 -2
  116. package/include/numkong/set/powervsx.h +2 -2
  117. package/include/numkong/set/rvv.h +2 -2
  118. package/include/numkong/set/rvvbb.h +2 -2
  119. package/include/numkong/set/sve.h +2 -2
  120. package/include/numkong/sets/haswell.h +2 -2
  121. package/include/numkong/sets/icelake.h +2 -2
  122. package/include/numkong/sets/loongsonasx.h +2 -2
  123. package/include/numkong/sets/neon.h +2 -2
  124. package/include/numkong/sets/powervsx.h +2 -2
  125. package/include/numkong/sets/smebi32.h +2 -2
  126. package/include/numkong/sparse/icelake.h +2 -2
  127. package/include/numkong/sparse/neon.h +2 -2
  128. package/include/numkong/sparse/sve2.h +2 -2
  129. package/include/numkong/sparse/turin.h +2 -2
  130. package/include/numkong/spatial/alder.h +2 -2
  131. package/include/numkong/spatial/diamond.h +2 -2
  132. package/include/numkong/spatial/genoa.h +2 -2
  133. package/include/numkong/spatial/haswell.h +2 -2
  134. package/include/numkong/spatial/icelake.h +2 -2
  135. package/include/numkong/spatial/loongsonasx.h +2 -2
  136. package/include/numkong/spatial/neon.h +2 -2
  137. package/include/numkong/spatial/neonbfdot.h +2 -2
  138. package/include/numkong/spatial/neonfp8.h +2 -2
  139. package/include/numkong/spatial/neonsdot.h +2 -2
  140. package/include/numkong/spatial/powervsx.h +2 -2
  141. package/include/numkong/spatial/rvv.h +2 -2
  142. package/include/numkong/spatial/rvvbf16.h +2 -2
  143. package/include/numkong/spatial/rvvhalf.h +2 -2
  144. package/include/numkong/spatial/sierra.h +2 -2
  145. package/include/numkong/spatial/skylake.h +2 -2
  146. package/include/numkong/spatial/sve.h +2 -2
  147. package/include/numkong/spatial/svebfdot.h +2 -2
  148. package/include/numkong/spatial/svehalf.h +2 -2
  149. package/include/numkong/spatial/svesdot.h +2 -2
  150. package/include/numkong/spatials/alder.h +2 -2
  151. package/include/numkong/spatials/diamond.h +2 -2
  152. package/include/numkong/spatials/genoa.h +2 -2
  153. package/include/numkong/spatials/haswell.h +2 -2
  154. package/include/numkong/spatials/icelake.h +2 -2
  155. package/include/numkong/spatials/loongsonasx.h +2 -2
  156. package/include/numkong/spatials/neon.h +2 -2
  157. package/include/numkong/spatials/neonbfdot.h +2 -2
  158. package/include/numkong/spatials/neonfhm.h +2 -2
  159. package/include/numkong/spatials/neonfp8.h +2 -2
  160. package/include/numkong/spatials/neonsdot.h +2 -2
  161. package/include/numkong/spatials/powervsx.h +2 -2
  162. package/include/numkong/spatials/rvv.h +2 -2
  163. package/include/numkong/spatials/sapphireamx.h +2 -2
  164. package/include/numkong/spatials/sierra.h +2 -2
  165. package/include/numkong/spatials/skylake.h +2 -2
  166. package/include/numkong/spatials/sme.h +2 -2
  167. package/include/numkong/spatials/smef64.h +2 -2
  168. package/include/numkong/trigonometry/haswell.h +2 -2
  169. package/include/numkong/trigonometry/neon.h +2 -2
  170. package/include/numkong/trigonometry/rvv.h +2 -2
  171. package/include/numkong/trigonometry/skylake.h +2 -2
  172. package/include/numkong/types.h +88 -80
  173. package/package.json +7 -7
@@ -96,7 +96,7 @@
96
96
 
97
97
  #define NK_VERSION_MAJOR 7
98
98
  #define NK_VERSION_MINOR 4
99
- #define NK_VERSION_PATCH 1
99
+ #define NK_VERSION_PATCH 3
100
100
 
101
101
  /**
102
102
  * @brief Removes compile-time dispatching, and replaces it with runtime dispatching.
@@ -132,33 +132,33 @@
132
132
  // With `-std=c11` glibc hides `syscall()` behind `_GNU_SOURCE`, but if any
133
133
  // system header was included before us, `<features.h>` is already locked.
134
134
  // Forward-declare `syscall` directly — it always exists in glibc.
135
- #if defined(NK_DEFINED_LINUX_) && (NK_TARGET_X86_ || NK_TARGET_RISCV_)
135
+ #if defined(NK_DEFINED_LINUX_) && (NK_TARGET_X8664_ || NK_TARGET_RISCV64_)
136
136
  #include <sys/syscall.h> // `SYS_arch_prctl`, `SYS_riscv_hwprobe`
137
137
  #ifdef __cplusplus
138
138
  extern "C" long syscall(long, ...) noexcept;
139
139
  #else
140
140
  extern long syscall(long, ...);
141
141
  #endif
142
- #if NK_TARGET_RISCV_
142
+ #if NK_TARGET_RISCV64_
143
143
  #include <sys/auxv.h> // `getauxval`, `AT_HWCAP`
144
144
  #endif
145
145
  #endif
146
146
 
147
- #if defined(NK_DEFINED_LINUX_) && NK_TARGET_LOONGARCH_
147
+ #if defined(NK_DEFINED_LINUX_) && NK_TARGET_LOONGARCH64_
148
148
  #include <sys/auxv.h> // `getauxval`, `AT_HWCAP`
149
149
  #endif
150
150
 
151
- #if defined(NK_DEFINED_LINUX_) && NK_TARGET_POWER_
151
+ #if defined(NK_DEFINED_LINUX_) && NK_TARGET_POWER64_
152
152
  #include <sys/auxv.h> // `getauxval`, `AT_HWCAP`
153
153
  #endif
154
154
 
155
155
  // On FreeBSD RISC-V, we use elf_aux_info for capability detection
156
- #if defined(NK_DEFINED_FREEBSD_) && NK_TARGET_RISCV_
156
+ #if defined(NK_DEFINED_FREEBSD_) && NK_TARGET_RISCV64_
157
157
  #include <sys/auxv.h> // `elf_aux_info`, `AT_HWCAP`
158
158
  #endif
159
159
 
160
160
  // On Windows ARM, we use IsProcessorFeaturePresent API for capability detection
161
- #if defined(NK_DEFINED_WINDOWS_) && NK_TARGET_ARM_
161
+ #if defined(NK_DEFINED_WINDOWS_) && NK_TARGET_ARM64_
162
162
  #include <processthreadsapi.h> // `IsProcessorFeaturePresent`
163
163
  #endif
164
164
 
@@ -388,7 +388,7 @@ typedef void (*nk_kernel_cast_punned_t)(void const *from, nk_dtype_t from_type,
388
388
 
389
389
  typedef void (*nk_kernel_punned_t)(void *);
390
390
 
391
- #if NK_TARGET_X86_
391
+ #if NK_TARGET_X8664_
392
392
 
393
393
  NK_PUBLIC int nk_configure_thread_x86_(nk_capability_t capabilities) {
394
394
  #if NK_TARGET_SAPPHIREAMX
@@ -409,7 +409,7 @@ NK_PUBLIC int nk_configure_thread_x86_(nk_capability_t capabilities) {
409
409
  return 1;
410
410
  }
411
411
 
412
- NK_PUBLIC nk_capability_t nk_capabilities_x86_(void) {
412
+ NK_PUBLIC nk_capability_t nk_capabilities_x8664_(void) {
413
413
  union four_registers_t {
414
414
  int array[4];
415
415
  struct separate_t {
@@ -496,9 +496,9 @@ NK_PUBLIC nk_capability_t nk_capabilities_x86_(void) {
496
496
  (nk_cap_graniteamx_k * supports_graniteamx) | (nk_cap_serial_k));
497
497
  }
498
498
 
499
- #endif // NK_TARGET_X86_
499
+ #endif // NK_TARGET_X8664_
500
500
 
501
- #if NK_TARGET_ARM_
501
+ #if NK_TARGET_ARM64_
502
502
 
503
503
  #if defined(__clang__)
504
504
  #pragma clang attribute push(__attribute__((target("arch=armv8.5-a+sve"))), apply_to = function)
@@ -508,14 +508,14 @@ NK_PUBLIC nk_capability_t nk_capabilities_x86_(void) {
508
508
  #endif
509
509
 
510
510
  #if NK_HAS_POSIX_EXTENSIONS_
511
- static sigjmp_buf nk_mrs_test_jump_buffer_;
512
- static void nk_mrs_test_sigill_handler_(int sig) {
511
+ static sigjmp_buf nk_mrs_arm64_jump_buffer_;
512
+ static void nk_mrs_arm64_sigill_handler_(int sig) {
513
513
  nk_unused_(sig);
514
- siglongjmp(nk_mrs_test_jump_buffer_, 1);
514
+ siglongjmp(nk_mrs_arm64_jump_buffer_, 1);
515
515
  }
516
516
  #endif
517
517
 
518
- NK_PUBLIC int nk_configure_thread_arm_(nk_capability_t capabilities) {
518
+ NK_PUBLIC int nk_configure_thread_arm64_(nk_capability_t capabilities) {
519
519
  #if defined(_MSC_VER)
520
520
  nk_unused_(capabilities);
521
521
  return 1;
@@ -546,7 +546,7 @@ NK_PUBLIC int nk_configure_thread_arm_(nk_capability_t capabilities) {
546
546
 
547
547
  #elif defined(NK_DEFINED_LINUX_) || defined(NK_DEFINED_FREEBSD_)
548
548
  // Read ID registers via MRS. Only safe if MRS is known to work — indicated by
549
- // capabilities beyond basic NEON (nk_capabilities_arm_ validated MRS via sigaction probe).
549
+ // capabilities beyond basic NEON (nk_capabilities_arm64_ validated MRS via sigaction probe).
550
550
  if (capabilities & ~(nk_cap_neon_k | nk_cap_serial_k)) {
551
551
  // FEAT_EBF16: ID_AA64ISAR1_EL1.BF16 bits [47:44] >= 0b0010
552
552
  register unsigned long isar1_val __asm__("x0");
@@ -570,7 +570,7 @@ NK_PUBLIC int nk_configure_thread_arm_(nk_capability_t capabilities) {
570
570
  #endif // _MSC_VER
571
571
  }
572
572
 
573
- NK_PUBLIC nk_capability_t nk_capabilities_arm_(void) {
573
+ NK_PUBLIC nk_capability_t nk_capabilities_arm64_(void) {
574
574
  #if defined(NK_DEFINED_APPLE_)
575
575
  size_t size = sizeof(unsigned);
576
576
  unsigned supports_neon = 0, supports_fp16 = 0, supports_fhm = 0, supports_bf16 = 0, supports_i8mm = 0;
@@ -602,13 +602,13 @@ NK_PUBLIC nk_capability_t nk_capabilities_arm_(void) {
602
602
 
603
603
  #if NK_HAS_POSIX_EXTENSIONS_
604
604
  struct sigaction action_new, action_old;
605
- action_new.sa_handler = nk_mrs_test_sigill_handler_;
605
+ action_new.sa_handler = nk_mrs_arm64_sigill_handler_;
606
606
  sigemptyset(&action_new.sa_mask);
607
607
  action_new.sa_flags = 0;
608
608
 
609
609
  int mrs_works = 0;
610
610
  if (sigaction(SIGILL, &action_new, &action_old) == 0) {
611
- if (sigsetjmp(nk_mrs_test_jump_buffer_, 1) == 0) {
611
+ if (sigsetjmp(nk_mrs_arm64_jump_buffer_, 1) == 0) {
612
612
  register unsigned long midr_value __asm__("x0");
613
613
  __asm__ __volatile__(".inst 0xD5380000" : "=r"(midr_value)); // MRS x0, MIDR_EL1
614
614
  mrs_works = 1;
@@ -722,11 +722,11 @@ NK_PUBLIC nk_capability_t nk_capabilities_arm_(void) {
722
722
  #pragma GCC pop_options
723
723
  #endif
724
724
 
725
- #endif // NK_TARGET_ARM_
725
+ #endif // NK_TARGET_ARM64_
726
726
 
727
- #if NK_TARGET_RISCV_
727
+ #if NK_TARGET_RISCV64_
728
728
 
729
- NK_PUBLIC nk_capability_t nk_capabilities_riscv_(void) {
729
+ NK_PUBLIC nk_capability_t nk_capabilities_riscv64_(void) {
730
730
  #if defined(NK_DEFINED_LINUX_)
731
731
  unsigned long hwcap = getauxval(AT_HWCAP);
732
732
  nk_capability_t caps = nk_cap_serial_k;
@@ -758,11 +758,11 @@ NK_PUBLIC nk_capability_t nk_capabilities_riscv_(void) {
758
758
  #endif
759
759
  }
760
760
 
761
- #endif // NK_TARGET_RISCV_
761
+ #endif // NK_TARGET_RISCV64_
762
762
 
763
- #if NK_TARGET_LOONGARCH_
763
+ #if NK_TARGET_LOONGARCH64_
764
764
 
765
- NK_PUBLIC nk_capability_t nk_capabilities_loongarch_(void) {
765
+ NK_PUBLIC nk_capability_t nk_capabilities_loongarch64_(void) {
766
766
  #if defined(NK_DEFINED_LINUX_)
767
767
  unsigned long hwcap = getauxval(AT_HWCAP);
768
768
  nk_capability_t caps = nk_cap_serial_k;
@@ -774,11 +774,11 @@ NK_PUBLIC nk_capability_t nk_capabilities_loongarch_(void) {
774
774
  #endif
775
775
  }
776
776
 
777
- #endif // NK_TARGET_LOONGARCH_
777
+ #endif // NK_TARGET_LOONGARCH64_
778
778
 
779
- #if NK_TARGET_POWER_
779
+ #if NK_TARGET_POWER64_
780
780
 
781
- NK_PUBLIC nk_capability_t nk_capabilities_power_(void) {
781
+ NK_PUBLIC nk_capability_t nk_capabilities_power64_(void) {
782
782
  #if defined(NK_DEFINED_LINUX_)
783
783
  unsigned long hwcap = getauxval(AT_HWCAP);
784
784
  unsigned long hwcap2 = getauxval(AT_HWCAP2);
@@ -792,7 +792,7 @@ NK_PUBLIC nk_capability_t nk_capabilities_power_(void) {
792
792
  #endif
793
793
  }
794
794
 
795
- #endif // NK_TARGET_POWER_
795
+ #endif // NK_TARGET_POWER64_
796
796
 
797
797
  #if NK_TARGET_WASM_
798
798
 
@@ -826,27 +826,27 @@ NK_PUBLIC nk_capability_t nk_capabilities_v128relaxed_(void) {
826
826
  #endif // NK_TARGET_WASM_
827
827
 
828
828
  NK_PUBLIC int nk_configure_thread_(nk_capability_t capabilities) {
829
- #if NK_TARGET_X86_
829
+ #if NK_TARGET_X8664_
830
830
  return nk_configure_thread_x86_(capabilities);
831
831
  #endif
832
- #if NK_TARGET_ARM_
833
- return nk_configure_thread_arm_(capabilities);
832
+ #if NK_TARGET_ARM64_
833
+ return nk_configure_thread_arm64_(capabilities);
834
834
  #endif
835
835
  nk_unused_(capabilities);
836
836
  return 1; // success — no platform-specific thread configuration needed
837
837
  }
838
838
 
839
839
  NK_PUBLIC nk_capability_t nk_capabilities_(void) {
840
- #if NK_TARGET_X86_
841
- return nk_capabilities_x86_();
842
- #elif NK_TARGET_ARM_
843
- return nk_capabilities_arm_();
844
- #elif NK_TARGET_RISCV_
845
- return nk_capabilities_riscv_();
846
- #elif NK_TARGET_LOONGARCH_
847
- return nk_capabilities_loongarch_();
848
- #elif NK_TARGET_POWER_
849
- return nk_capabilities_power_();
840
+ #if NK_TARGET_X8664_
841
+ return nk_capabilities_x8664_();
842
+ #elif NK_TARGET_ARM64_
843
+ return nk_capabilities_arm64_();
844
+ #elif NK_TARGET_RISCV64_
845
+ return nk_capabilities_riscv64_();
846
+ #elif NK_TARGET_LOONGARCH64_
847
+ return nk_capabilities_loongarch64_();
848
+ #elif NK_TARGET_POWER64_
849
+ return nk_capabilities_power64_();
850
850
  #elif NK_TARGET_WASM_
851
851
  return nk_capabilities_v128relaxed_();
852
852
  #else
@@ -860,7 +860,7 @@ NK_PUBLIC nk_capability_t nk_capabilities_(void) {
860
860
  */
861
861
  NK_PUBLIC nk_capability_t nk_capabilities_compiled_(void) {
862
862
  nk_capability_t caps = nk_cap_serial_k;
863
- #if NK_TARGET_X86_
863
+ #if NK_TARGET_X8664_
864
864
  caps |= nk_cap_haswell_k * NK_TARGET_HASWELL;
865
865
  caps |= nk_cap_skylake_k * NK_TARGET_SKYLAKE;
866
866
  caps |= nk_cap_icelake_k * NK_TARGET_ICELAKE;
@@ -873,7 +873,7 @@ NK_PUBLIC nk_capability_t nk_capabilities_compiled_(void) {
873
873
  caps |= nk_cap_alder_k * NK_TARGET_ALDER;
874
874
  caps |= nk_cap_sierra_k * NK_TARGET_SIERRA;
875
875
  #endif
876
- #if NK_TARGET_ARM_
876
+ #if NK_TARGET_ARM64_
877
877
  caps |= nk_cap_neon_k * NK_TARGET_NEON;
878
878
  caps |= nk_cap_neonhalf_k * NK_TARGET_NEONHALF;
879
879
  caps |= nk_cap_neonsdot_k * NK_TARGET_NEONSDOT;
@@ -896,16 +896,16 @@ NK_PUBLIC nk_capability_t nk_capabilities_compiled_(void) {
896
896
  caps |= nk_cap_smelut2_k * NK_TARGET_SMELUT2;
897
897
  caps |= nk_cap_smefa64_k * NK_TARGET_SMEFA64;
898
898
  #endif
899
- #if NK_TARGET_RISCV_
899
+ #if NK_TARGET_RISCV64_
900
900
  caps |= nk_cap_rvv_k * NK_TARGET_RVV;
901
901
  caps |= nk_cap_rvvhalf_k * NK_TARGET_RVVHALF;
902
902
  caps |= nk_cap_rvvbf16_k * NK_TARGET_RVVBF16;
903
903
  caps |= nk_cap_rvvbb_k * NK_TARGET_RVVBB;
904
904
  #endif
905
- #if NK_TARGET_LOONGARCH_
905
+ #if NK_TARGET_LOONGARCH64_
906
906
  caps |= nk_cap_loongsonasx_k * NK_TARGET_LOONGSONASX;
907
907
  #endif
908
- #if NK_TARGET_POWER_
908
+ #if NK_TARGET_POWER64_
909
909
  caps |= nk_cap_powervsx_k * NK_TARGET_POWERVSX;
910
910
  #endif
911
911
  #if NK_TARGET_WASM_
@@ -12,7 +12,7 @@
12
12
  #ifndef NK_CAST_DIAMOND_H
13
13
  #define NK_CAST_DIAMOND_H
14
14
 
15
- #if NK_TARGET_X86_
15
+ #if NK_TARGET_X8664_
16
16
  #if NK_TARGET_DIAMOND
17
17
 
18
18
  #include "numkong/types.h"
@@ -60,5 +60,5 @@ NK_INTERNAL void nk_partial_load_e5m2x32_to_f16x32_diamond_(nk_e5m2_t const *src
60
60
  #endif
61
61
 
62
62
  #endif // NK_TARGET_DIAMOND
63
- #endif // NK_TARGET_X86_
63
+ #endif // NK_TARGET_X8664_
64
64
  #endif // NK_CAST_DIAMOND_H
@@ -20,7 +20,7 @@
20
20
  #ifndef NK_CAST_HASWELL_H
21
21
  #define NK_CAST_HASWELL_H
22
22
 
23
- #if NK_TARGET_X86_
23
+ #if NK_TARGET_X8664_
24
24
  #if NK_TARGET_HASWELL
25
25
 
26
26
  #include "numkong/types.h"
@@ -819,5 +819,5 @@ NK_PUBLIC void nk_cast_haswell(void const *from, nk_dtype_t from_type, nk_size_t
819
819
  #endif
820
820
 
821
821
  #endif // NK_TARGET_HASWELL
822
- #endif // NK_TARGET_X86_
822
+ #endif // NK_TARGET_X8664_
823
823
  #endif // NK_CAST_HASWELL_H
@@ -19,7 +19,7 @@
19
19
  #ifndef NK_CAST_ICELAKE_H
20
20
  #define NK_CAST_ICELAKE_H
21
21
 
22
- #if NK_TARGET_X86_
22
+ #if NK_TARGET_X8664_
23
23
  #if NK_TARGET_ICELAKE
24
24
 
25
25
  #include "numkong/types.h"
@@ -471,5 +471,5 @@ NK_PUBLIC void nk_cast_icelake(void const *from, nk_dtype_t from_type, nk_size_t
471
471
  #endif
472
472
 
473
473
  #endif // NK_TARGET_ICELAKE
474
- #endif // NK_TARGET_X86_
474
+ #endif // NK_TARGET_X8664_
475
475
  #endif // NK_CAST_ICELAKE_H
@@ -27,7 +27,7 @@
27
27
  #ifndef NK_CAST_LOONGSONASX_H
28
28
  #define NK_CAST_LOONGSONASX_H
29
29
 
30
- #if NK_TARGET_LOONGARCH_
30
+ #if NK_TARGET_LOONGARCH64_
31
31
  #if NK_TARGET_LOONGSONASX
32
32
 
33
33
  #include "numkong/types.h"
@@ -248,5 +248,5 @@ NK_INTERNAL void nk_euclidean_through_u32_from_dot_loongsonasx_(nk_b128_vec_t do
248
248
  #endif
249
249
 
250
250
  #endif // NK_TARGET_LOONGSONASX
251
- #endif // NK_TARGET_LOONGARCH_
251
+ #endif // NK_TARGET_LOONGARCH64_
252
252
  #endif // NK_CAST_LOONGSONASX_H
@@ -49,7 +49,7 @@
49
49
  #ifndef NK_CAST_NEON_H
50
50
  #define NK_CAST_NEON_H
51
51
 
52
- #if NK_TARGET_ARM_
52
+ #if NK_TARGET_ARM64_
53
53
  #if NK_TARGET_NEON
54
54
 
55
55
  #include "numkong/types.h"
@@ -1155,5 +1155,5 @@ NK_PUBLIC void nk_cast_neon(void const *from, nk_dtype_t from_type, nk_size_t n,
1155
1155
  #endif
1156
1156
 
1157
1157
  #endif // NK_TARGET_NEON
1158
- #endif // NK_TARGET_ARM_
1158
+ #endif // NK_TARGET_ARM64_
1159
1159
  #endif // NK_CAST_NEON_H
@@ -66,7 +66,7 @@
66
66
  #ifndef NK_CAST_POWERVSX_H
67
67
  #define NK_CAST_POWERVSX_H
68
68
 
69
- #if NK_TARGET_POWER_
69
+ #if NK_TARGET_POWER64_
70
70
  #if NK_TARGET_POWERVSX
71
71
 
72
72
  #include "numkong/types.h"
@@ -445,5 +445,5 @@ NK_PUBLIC void nk_cast_powervsx(void const *from, nk_dtype_t from_type, nk_size_
445
445
  #endif
446
446
 
447
447
  #endif // NK_TARGET_POWERVSX
448
- #endif // NK_TARGET_POWER_
448
+ #endif // NK_TARGET_POWER64_
449
449
  #endif // NK_CAST_POWERVSX_H
@@ -37,7 +37,7 @@
37
37
  #ifndef NK_CAST_RVV_H
38
38
  #define NK_CAST_RVV_H
39
39
 
40
- #if NK_TARGET_RISCV_
40
+ #if NK_TARGET_RISCV64_
41
41
  #if NK_TARGET_RVV
42
42
 
43
43
  #include "numkong/types.h"
@@ -966,5 +966,5 @@ NK_PUBLIC void nk_cast_rvv(void const *from, nk_dtype_t from_type, nk_size_t cou
966
966
  #endif
967
967
 
968
968
  #endif // NK_TARGET_RVV
969
- #endif // NK_TARGET_RISCV_
969
+ #endif // NK_TARGET_RISCV64_
970
970
  #endif // NK_CAST_RVV_H
@@ -22,7 +22,7 @@
22
22
  #ifndef NK_CAST_SAPPHIRE_H
23
23
  #define NK_CAST_SAPPHIRE_H
24
24
 
25
- #if NK_TARGET_X86_
25
+ #if NK_TARGET_X8664_
26
26
  #if NK_TARGET_SAPPHIRE
27
27
 
28
28
  #include "numkong/types.h"
@@ -258,5 +258,5 @@ NK_PUBLIC void nk_cast_sapphire(void const *from, nk_dtype_t from_type, nk_size_
258
258
  #endif
259
259
 
260
260
  #endif // NK_TARGET_SAPPHIRE
261
- #endif // NK_TARGET_X86_
261
+ #endif // NK_TARGET_X8664_
262
262
  #endif // NK_CAST_SAPPHIRE_H
@@ -23,7 +23,7 @@
23
23
  #ifndef NK_CAST_SKYLAKE_H
24
24
  #define NK_CAST_SKYLAKE_H
25
25
 
26
- #if NK_TARGET_X86_
26
+ #if NK_TARGET_X8664_
27
27
  #if NK_TARGET_SKYLAKE
28
28
 
29
29
  #include "numkong/types.h"
@@ -911,5 +911,5 @@ NK_PUBLIC void nk_cast_skylake(void const *from, nk_dtype_t from_type, nk_size_t
911
911
  #endif
912
912
 
913
913
  #endif // NK_TARGET_SKYLAKE
914
- #endif // NK_TARGET_X86_
914
+ #endif // NK_TARGET_X8664_
915
915
  #endif // NK_CAST_SKYLAKE_H
@@ -11,7 +11,7 @@
11
11
  #ifndef NK_CURVED_GENOA_H
12
12
  #define NK_CURVED_GENOA_H
13
13
 
14
- #if NK_TARGET_X86_
14
+ #if NK_TARGET_X8664_
15
15
  #if NK_TARGET_GENOA
16
16
 
17
17
  #include "numkong/types.h"
@@ -178,5 +178,5 @@ NK_PUBLIC void nk_bilinear_bf16c_genoa(nk_bf16c_t const *a, nk_bf16c_t const *b,
178
178
  #endif
179
179
 
180
180
  #endif // NK_TARGET_GENOA
181
- #endif // NK_TARGET_X86_
181
+ #endif // NK_TARGET_X8664_
182
182
  #endif // NK_CURVED_GENOA_H
@@ -11,7 +11,7 @@
11
11
  #ifndef NK_CURVED_HASWELL_H
12
12
  #define NK_CURVED_HASWELL_H
13
13
 
14
- #if NK_TARGET_X86_
14
+ #if NK_TARGET_X8664_
15
15
  #if NK_TARGET_HASWELL
16
16
 
17
17
  #include "numkong/types.h"
@@ -272,5 +272,5 @@ NK_PUBLIC void nk_mahalanobis_bf16_haswell(nk_bf16_t const *a, nk_bf16_t const *
272
272
  #endif
273
273
 
274
274
  #endif // NK_TARGET_HASWELL
275
- #endif // NK_TARGET_X86_
275
+ #endif // NK_TARGET_X8664_
276
276
  #endif // NK_CURVED_HASWELL_H
@@ -24,7 +24,7 @@
24
24
  #ifndef NK_CURVED_NEON_H
25
25
  #define NK_CURVED_NEON_H
26
26
 
27
- #if NK_TARGET_ARM_
27
+ #if NK_TARGET_ARM64_
28
28
  #if NK_TARGET_NEON
29
29
 
30
30
  #include "numkong/types.h"
@@ -325,5 +325,5 @@ NK_PUBLIC void nk_bilinear_f16c_neon(nk_f16c_t const *a_pairs, nk_f16c_t const *
325
325
  #endif
326
326
 
327
327
  #endif // NK_TARGET_NEON
328
- #endif // NK_TARGET_ARM_
328
+ #endif // NK_TARGET_ARM64_
329
329
  #endif // NK_CURVED_NEON_H
@@ -24,7 +24,7 @@
24
24
  #ifndef NK_CURVED_NEONBFDOT_H
25
25
  #define NK_CURVED_NEONBFDOT_H
26
26
 
27
- #if NK_TARGET_ARM_
27
+ #if NK_TARGET_ARM64_
28
28
  #if NK_TARGET_NEONBFDOT
29
29
 
30
30
  #include "numkong/types.h" // `nk_bf16_t`
@@ -207,5 +207,5 @@ NK_PUBLIC void nk_bilinear_bf16c_neonbfdot(nk_bf16c_t const *a_pairs, nk_bf16c_t
207
207
  #endif
208
208
 
209
209
  #endif // NK_TARGET_NEONBFDOT
210
- #endif // NK_TARGET_ARM_
210
+ #endif // NK_TARGET_ARM64_
211
211
  #endif // NK_CURVED_NEONBFDOT_H
@@ -15,7 +15,7 @@
15
15
  #ifndef NK_CURVED_RVV_H
16
16
  #define NK_CURVED_RVV_H
17
17
 
18
- #if NK_TARGET_RISCV_
18
+ #if NK_TARGET_RISCV64_
19
19
  #if NK_TARGET_RVV
20
20
 
21
21
  #include "numkong/types.h"
@@ -301,5 +301,5 @@ NK_PUBLIC void nk_mahalanobis_bf16_rvv(nk_bf16_t const *a, nk_bf16_t const *b, n
301
301
  #endif
302
302
 
303
303
  #endif // NK_TARGET_RVV
304
- #endif // NK_TARGET_RISCV_
304
+ #endif // NK_TARGET_RISCV64_
305
305
  #endif // NK_CURVED_RVV_H
@@ -13,7 +13,7 @@
13
13
  #ifndef NK_CURVED_SKYLAKE_H
14
14
  #define NK_CURVED_SKYLAKE_H
15
15
 
16
- #if NK_TARGET_X86_
16
+ #if NK_TARGET_X8664_
17
17
  #if NK_TARGET_SKYLAKE
18
18
 
19
19
  #include "numkong/types.h"
@@ -453,5 +453,5 @@ NK_PUBLIC void nk_bilinear_f64c_skylake(nk_f64c_t const *a, nk_f64c_t const *b,
453
453
  #endif
454
454
 
455
455
  #endif // NK_TARGET_SKYLAKE
456
- #endif // NK_TARGET_X86_
456
+ #endif // NK_TARGET_X8664_
457
457
  #endif // NK_CURVED_SKYLAKE_H
@@ -48,7 +48,7 @@
48
48
  #ifndef NK_CURVED_SMEF64_H
49
49
  #define NK_CURVED_SMEF64_H
50
50
 
51
- #if NK_TARGET_ARM_
51
+ #if NK_TARGET_ARM64_
52
52
  #if NK_TARGET_SMEF64
53
53
 
54
54
  #include "numkong/types.h"
@@ -506,5 +506,5 @@ NK_PUBLIC void nk_bilinear_f64c_smef64(nk_f64c_t const *a_pairs, nk_f64c_t const
506
506
  #endif
507
507
 
508
508
  #endif // NK_TARGET_SMEF64
509
- #endif // NK_TARGET_ARM_
509
+ #endif // NK_TARGET_ARM64_
510
510
  #endif // NK_CURVED_SMEF64_H
@@ -80,7 +80,7 @@
80
80
  #ifndef NK_DOT_ALDER_H
81
81
  #define NK_DOT_ALDER_H
82
82
 
83
- #if NK_TARGET_X86_
83
+ #if NK_TARGET_X8664_
84
84
  #if NK_TARGET_ALDER
85
85
 
86
86
  #include "numkong/types.h"
@@ -559,5 +559,5 @@ NK_INTERNAL void nk_dot_e2m3x32_finalize_alder(
559
559
  #endif
560
560
 
561
561
  #endif // NK_TARGET_ALDER
562
- #endif // NK_TARGET_X86_
562
+ #endif // NK_TARGET_X8664_
563
563
  #endif // NK_DOT_ALDER_H
@@ -27,7 +27,7 @@
27
27
  #ifndef NK_DOT_DIAMOND_H
28
28
  #define NK_DOT_DIAMOND_H
29
29
 
30
- #if NK_TARGET_X86_
30
+ #if NK_TARGET_X8664_
31
31
  #if NK_TARGET_DIAMOND
32
32
 
33
33
  #include "numkong/types.h"
@@ -154,5 +154,5 @@ NK_INTERNAL void nk_dot_through_f16_finalize_diamond_(
154
154
  #endif
155
155
 
156
156
  #endif // NK_TARGET_DIAMOND
157
- #endif // NK_TARGET_X86_
157
+ #endif // NK_TARGET_X8664_
158
158
  #endif // NK_DOT_DIAMOND_H
@@ -76,7 +76,7 @@
76
76
  #ifndef NK_DOT_GENOA_H
77
77
  #define NK_DOT_GENOA_H
78
78
 
79
- #if NK_TARGET_X86_
79
+ #if NK_TARGET_X8664_
80
80
  #if NK_TARGET_GENOA
81
81
 
82
82
  #include "numkong/types.h"
@@ -285,5 +285,5 @@ NK_INTERNAL void nk_dot_bf16x32_finalize_genoa(nk_dot_bf16x32_state_genoa_t cons
285
285
  #endif
286
286
 
287
287
  #endif // NK_TARGET_GENOA
288
- #endif // NK_TARGET_X86_
288
+ #endif // NK_TARGET_X8664_
289
289
  #endif // NK_DOT_GENOA_H
@@ -86,7 +86,7 @@
86
86
  #ifndef NK_DOT_HASWELL_H
87
87
  #define NK_DOT_HASWELL_H
88
88
 
89
- #if NK_TARGET_X86_
89
+ #if NK_TARGET_X8664_
90
90
  #if NK_TARGET_HASWELL
91
91
 
92
92
  #include "numkong/types.h"
@@ -1719,5 +1719,5 @@ NK_INTERNAL void nk_dot_u1x128_finalize_haswell( //
1719
1719
  #endif
1720
1720
 
1721
1721
  #endif // NK_TARGET_HASWELL
1722
- #endif // NK_TARGET_X86_
1722
+ #endif // NK_TARGET_X8664_
1723
1723
  #endif // NK_DOT_HASWELL_H
@@ -75,7 +75,7 @@
75
75
  #ifndef NK_DOT_ICELAKE_H
76
76
  #define NK_DOT_ICELAKE_H
77
77
 
78
- #if NK_TARGET_X86_
78
+ #if NK_TARGET_X8664_
79
79
  #if NK_TARGET_ICELAKE
80
80
 
81
81
  #include "numkong/types.h"
@@ -993,5 +993,5 @@ NK_INTERNAL void nk_dot_u1x512_finalize_icelake( //
993
993
  #endif
994
994
 
995
995
  #endif // NK_TARGET_ICELAKE
996
- #endif // NK_TARGET_X86_
996
+ #endif // NK_TARGET_X8664_
997
997
  #endif // NK_DOT_ICELAKE_H
@@ -25,7 +25,7 @@
25
25
  #ifndef NK_DOT_LOONGSONASX_H
26
26
  #define NK_DOT_LOONGSONASX_H
27
27
 
28
- #if NK_TARGET_LOONGARCH_
28
+ #if NK_TARGET_LOONGARCH64_
29
29
  #if NK_TARGET_LOONGSONASX
30
30
 
31
31
  #include "numkong/types.h"
@@ -667,5 +667,5 @@ NK_INTERNAL void nk_dot_u1x256_finalize_loongsonasx(
667
667
  #endif
668
668
 
669
669
  #endif // NK_TARGET_LOONGSONASX
670
- #endif // NK_TARGET_LOONGARCH_
670
+ #endif // NK_TARGET_LOONGARCH64_
671
671
  #endif // NK_DOT_LOONGSONASX_H
@@ -86,7 +86,7 @@
86
86
  #ifndef NK_DOT_NEON_H
87
87
  #define NK_DOT_NEON_H
88
88
 
89
- #if NK_TARGET_ARM_
89
+ #if NK_TARGET_ARM64_
90
90
  #if NK_TARGET_NEON
91
91
 
92
92
  #include "numkong/cast/neon.h" // `nk_e4m3x8_to_f16x8_neon_`
@@ -865,5 +865,5 @@ NK_PUBLIC void nk_vdot_f16c_neon(nk_f16c_t const *a_pairs, nk_f16c_t const *b_pa
865
865
  #endif
866
866
 
867
867
  #endif // NK_TARGET_NEON
868
- #endif // NK_TARGET_ARM_
868
+ #endif // NK_TARGET_ARM64_
869
869
  #endif // NK_DOT_NEON_H
@@ -57,7 +57,7 @@
57
57
  #ifndef NK_DOT_NEONBFDOT_H
58
58
  #define NK_DOT_NEONBFDOT_H
59
59
 
60
- #if NK_TARGET_ARM_
60
+ #if NK_TARGET_ARM64_
61
61
  #if NK_TARGET_NEONBFDOT
62
62
 
63
63
  #include "numkong/types.h"
@@ -239,5 +239,5 @@ NK_INTERNAL void nk_dot_bf16x8_finalize_neonbfdot(
239
239
  #endif
240
240
 
241
241
  #endif // NK_TARGET_NEONBFDOT
242
- #endif // NK_TARGET_ARM_
242
+ #endif // NK_TARGET_ARM64_
243
243
  #endif // NK_DOT_NEONBFDOT_H
@@ -59,7 +59,7 @@
59
59
  #ifndef NK_DOT_NEONFHM_H
60
60
  #define NK_DOT_NEONFHM_H
61
61
 
62
- #if NK_TARGET_ARM_
62
+ #if NK_TARGET_ARM64_
63
63
  #if NK_TARGET_NEONFHM
64
64
 
65
65
  #include "numkong/types.h"
@@ -354,5 +354,5 @@ NK_INTERNAL void nk_dot_e5m2x16_finalize_neonfhm(
354
354
  #endif
355
355
 
356
356
  #endif // NK_TARGET_NEONFHM
357
- #endif // NK_TARGET_ARM_
357
+ #endif // NK_TARGET_ARM64_
358
358
  #endif // NK_DOT_NEONFHM_H