numkong 7.5.0 → 7.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/binding.gyp +18 -0
  2. package/c/dispatch_e5m2.c +23 -3
  3. package/include/numkong/capabilities.h +1 -1
  4. package/include/numkong/cast/README.md +3 -0
  5. package/include/numkong/cast/haswell.h +28 -64
  6. package/include/numkong/cast/serial.h +17 -0
  7. package/include/numkong/cast/skylake.h +67 -52
  8. package/include/numkong/cast.h +1 -0
  9. package/include/numkong/dot/README.md +1 -0
  10. package/include/numkong/dot/haswell.h +92 -13
  11. package/include/numkong/dot/serial.h +15 -0
  12. package/include/numkong/dot/skylake.h +61 -14
  13. package/include/numkong/dots/README.md +2 -0
  14. package/include/numkong/dots/graniteamx.h +434 -0
  15. package/include/numkong/dots/haswell.h +28 -28
  16. package/include/numkong/dots/sapphireamx.h +1 -1
  17. package/include/numkong/dots/serial.h +23 -8
  18. package/include/numkong/dots/skylake.h +28 -23
  19. package/include/numkong/dots.h +12 -0
  20. package/include/numkong/each/serial.h +18 -1
  21. package/include/numkong/geospatial/serial.h +14 -3
  22. package/include/numkong/maxsim/serial.h +15 -0
  23. package/include/numkong/mesh/README.md +50 -44
  24. package/include/numkong/mesh/genoa.h +462 -0
  25. package/include/numkong/mesh/haswell.h +806 -933
  26. package/include/numkong/mesh/neon.h +871 -943
  27. package/include/numkong/mesh/neonbfdot.h +382 -522
  28. package/include/numkong/mesh/neonfhm.h +676 -0
  29. package/include/numkong/mesh/rvv.h +404 -319
  30. package/include/numkong/mesh/serial.h +204 -162
  31. package/include/numkong/mesh/skylake.h +1029 -1585
  32. package/include/numkong/mesh/v128relaxed.h +403 -377
  33. package/include/numkong/mesh.h +38 -0
  34. package/include/numkong/reduce/serial.h +15 -1
  35. package/include/numkong/sparse/serial.h +17 -2
  36. package/include/numkong/spatial/genoa.h +0 -68
  37. package/include/numkong/spatial/haswell.h +98 -56
  38. package/include/numkong/spatial/serial.h +15 -0
  39. package/include/numkong/spatial/skylake.h +114 -54
  40. package/include/numkong/spatial.h +0 -12
  41. package/include/numkong/spatials/graniteamx.h +128 -0
  42. package/include/numkong/spatials/serial.h +18 -1
  43. package/include/numkong/spatials/skylake.h +2 -2
  44. package/include/numkong/spatials.h +17 -0
  45. package/include/numkong/tensor.hpp +107 -23
  46. package/javascript/numkong.c +3 -2
  47. package/package.json +7 -7
  48. package/wasm/numkong.wasm +0 -0
@@ -1,5 +1,5 @@
1
1
  /**
2
- * @brief NumKong Tensor types and tensor-level operations for C++23 and newer.
2
+ * @brief NumKong Tensor types and tensor-level operations for C++20 and newer.
3
3
  * @file include/numkong/tensor.hpp
4
4
  * @author Ash Vardanian
5
5
  * @date March 2026
@@ -19,7 +19,8 @@
19
19
  * Features:
20
20
  * - Signed strides (ptrdiff_t) for reversed/transposed views
21
21
  * - Signed indexing (negative = from end)
22
- * - C++23 variadic `operator[]` for flat access, exact access, and trailing `slice`
22
+ * - Variadic `operator()` for flat/exact access and trailing `slice` (C++20-portable);
23
+ * `operator[]` multi-arg sugar provided when the compiler supports P2128 (C++23).
23
24
  * - Axis iteration (rows_views(), rows_spans(), axis_iterator)
24
25
  * - Conversion to vector_view/vector_span for rank-1 tensors
25
26
  */
@@ -37,6 +38,14 @@
37
38
 
38
39
  #include "vector.hpp" // `aligned_allocator`
39
40
 
41
+ // True when the compiler supports C++23 P2128 multi-arg `operator[]`. Under
42
+ // this gate we expose `t[a, b, c]` as sugar that delegates to `operator()`.
43
+ #if defined(__cpp_multidimensional_subscript) && __cpp_multidimensional_subscript >= 202110L
44
+ #define NK_HAS_MULTIDIMENSIONAL_SUBSCRIPT_ 1
45
+ #else
46
+ #define NK_HAS_MULTIDIMENSIONAL_SUBSCRIPT_ 0
47
+ #endif
48
+
40
49
  namespace ashvardanian::numkong {
41
50
 
42
51
  template <typename value_type_, std::size_t max_rank_>
@@ -300,26 +309,44 @@ struct tensor_view {
300
309
  return tensor_flat_lookup_(*this, idx);
301
310
  }
302
311
 
303
- /** @brief Exact multi-dimensional scalar lookup. */
312
+ /** @brief Exact multi-dimensional scalar lookup via call syntax (C++20-portable). */
304
313
  template <std::integral... index_types_>
305
314
  requires(sizeof...(index_types_) >= 2)
306
- decltype(auto) operator[](index_types_... idxs) const noexcept {
315
+ decltype(auto) operator()(index_types_... idxs) const noexcept {
307
316
  nk_assert_(shape_.rank == sizeof...(index_types_));
308
317
  auto coords = resolve_tensor_indices_<value_type_>(shape_, std::index_sequence_for<index_types_...> {},
309
318
  idxs...);
310
319
  return tensor_lookup_resolved_(*this, std::span<std::size_t const, sizeof...(index_types_)>(coords));
311
320
  }
312
321
 
322
+ #if NK_HAS_MULTIDIMENSIONAL_SUBSCRIPT_
323
+ /** @brief C++23 sugar: `t[i, j, k]` scalar lookup, delegates to `operator()`. */
324
+ template <std::integral... index_types_>
325
+ requires(sizeof...(index_types_) >= 2)
326
+ decltype(auto) operator[](index_types_... idxs) const noexcept {
327
+ return (*this)(idxs...);
328
+ }
329
+ #endif
330
+
313
331
  /** @brief Trailing `slice` returns the same view. */
314
332
  constexpr tensor_view operator[](tensor_slice_t) const noexcept { return *this; }
315
333
 
316
- /** @brief Prefix leading-axis slicing with a trailing `slice` marker. */
334
+ /** @brief Prefix leading-axis slicing with a trailing `slice` marker (call syntax, C++20-portable). */
317
335
  template <typename first_type_, typename second_type_, typename... rest_types_>
318
336
  requires(trailing_tensor_slice_args_v<first_type_, second_type_, rest_types_...>)
319
- tensor_view operator[](first_type_ first, second_type_ second, rest_types_... rest) const noexcept {
337
+ tensor_view operator()(first_type_ first, second_type_ second, rest_types_... rest) const noexcept {
320
338
  return tensor_slice_suffix_(*this, first, second, rest...);
321
339
  }
322
340
 
341
+ #if NK_HAS_MULTIDIMENSIONAL_SUBSCRIPT_
342
+ /** @brief C++23 sugar: `t[i, nk::slice]` slicing, delegates to `operator()`. */
343
+ template <typename first_type_, typename second_type_, typename... rest_types_>
344
+ requires(trailing_tensor_slice_args_v<first_type_, second_type_, rest_types_...>)
345
+ tensor_view operator[](first_type_ first, second_type_ second, rest_types_... rest) const noexcept {
346
+ return (*this)(first, second, rest...);
347
+ }
348
+ #endif
349
+
323
350
  /** @brief Rank-0 scalar access. */
324
351
  decltype(auto) scalar() const noexcept {
325
352
  nk_assert_(shape_.rank == 0);
@@ -512,22 +539,36 @@ struct tensor_span {
512
539
  return tensor_flat_lookup_(static_cast<tensor_view<value_type_, max_rank_>>(*this), idx);
513
540
  }
514
541
 
515
- /** @brief Exact multi-dimensional scalar lookup. */
542
+ /** @brief Exact multi-dimensional scalar lookup via call syntax (C++20-portable). */
516
543
  template <std::integral... index_types_>
517
544
  requires(sizeof...(index_types_) >= 2)
518
- decltype(auto) operator[](index_types_... idxs) noexcept {
545
+ decltype(auto) operator()(index_types_... idxs) noexcept {
519
546
  nk_assert_(shape_.rank == sizeof...(index_types_));
520
547
  auto coords = resolve_tensor_indices_<value_type_>(shape_, std::index_sequence_for<index_types_...> {},
521
548
  idxs...);
522
549
  return tensor_lookup_resolved_(*this, std::span<std::size_t const, sizeof...(index_types_)>(coords));
523
550
  }
524
551
 
525
- /** @brief Const full-coordinate lookup. */
552
+ /** @brief Const full-coordinate lookup via call syntax. */
553
+ template <std::integral... index_types_>
554
+ requires(sizeof...(index_types_) >= 2)
555
+ decltype(auto) operator()(index_types_... idxs) const noexcept {
556
+ return static_cast<tensor_view<value_type_, max_rank_>>(*this)(idxs...);
557
+ }
558
+
559
+ #if NK_HAS_MULTIDIMENSIONAL_SUBSCRIPT_
560
+ /** @brief C++23 sugar: multi-arg `[]` scalar lookup, delegates to `operator()`. */
561
+ template <std::integral... index_types_>
562
+ requires(sizeof...(index_types_) >= 2)
563
+ decltype(auto) operator[](index_types_... idxs) noexcept {
564
+ return (*this)(idxs...);
565
+ }
526
566
  template <std::integral... index_types_>
527
567
  requires(sizeof...(index_types_) >= 2)
528
568
  decltype(auto) operator[](index_types_... idxs) const noexcept {
529
- return static_cast<tensor_view<value_type_, max_rank_>>(*this)[idxs...];
569
+ return (*this)(idxs...);
530
570
  }
571
+ #endif
531
572
 
532
573
  /** @brief Trailing `slice` returns the same span. */
533
574
  constexpr tensor_span operator[](tensor_slice_t) noexcept { return *this; }
@@ -535,21 +576,36 @@ struct tensor_span {
535
576
  return static_cast<tensor_view<value_type_, max_rank_>>(*this);
536
577
  }
537
578
 
538
- /** @brief Prefix leading-axis slicing with a trailing `slice` marker. */
579
+ /** @brief Prefix leading-axis slicing via call syntax (C++20-portable). */
539
580
  template <typename first_type_, typename second_type_, typename... rest_types_>
540
581
  requires(trailing_tensor_slice_args_v<first_type_, second_type_, rest_types_...>)
541
- tensor_span operator[](first_type_ first, second_type_ second, rest_types_... rest) noexcept {
582
+ tensor_span operator()(first_type_ first, second_type_ second, rest_types_... rest) noexcept {
542
583
  return tensor_slice_suffix_(*this, first, second, rest...);
543
584
  }
544
585
 
545
- /** @brief Const prefix leading-axis slicing with a trailing `slice` marker. */
586
+ /** @brief Const prefix leading-axis slicing via call syntax. */
546
587
  template <typename first_type_, typename second_type_, typename... rest_types_>
547
588
  requires(trailing_tensor_slice_args_v<first_type_, second_type_, rest_types_...>)
548
- tensor_view<value_type_, max_rank_> operator[](first_type_ first, second_type_ second,
589
+ tensor_view<value_type_, max_rank_> operator()(first_type_ first, second_type_ second,
549
590
  rest_types_... rest) const noexcept {
550
591
  return tensor_slice_suffix_(static_cast<tensor_view<value_type_, max_rank_>>(*this), first, second, rest...);
551
592
  }
552
593
 
594
+ #if NK_HAS_MULTIDIMENSIONAL_SUBSCRIPT_
595
+ /** @brief C++23 sugar: multi-arg `[]` slicing, delegates to `operator()`. */
596
+ template <typename first_type_, typename second_type_, typename... rest_types_>
597
+ requires(trailing_tensor_slice_args_v<first_type_, second_type_, rest_types_...>)
598
+ tensor_span operator[](first_type_ first, second_type_ second, rest_types_... rest) noexcept {
599
+ return (*this)(first, second, rest...);
600
+ }
601
+ template <typename first_type_, typename second_type_, typename... rest_types_>
602
+ requires(trailing_tensor_slice_args_v<first_type_, second_type_, rest_types_...>)
603
+ tensor_view<value_type_, max_rank_> operator[](first_type_ first, second_type_ second,
604
+ rest_types_... rest) const noexcept {
605
+ return (*this)(first, second, rest...);
606
+ }
607
+ #endif
608
+
553
609
  /** @brief Rank-0 mutable scalar access. */
554
610
  decltype(auto) scalar_ref() noexcept {
555
611
  nk_assert_(shape_.rank == 0);
@@ -1546,38 +1602,66 @@ struct tensor {
1546
1602
  return view()[idx];
1547
1603
  }
1548
1604
 
1549
- /** @brief Exact multi-dimensional scalar lookup. */
1605
+ /** @brief Exact multi-dimensional scalar lookup via call syntax (C++20-portable). */
1550
1606
  template <std::integral... index_types_>
1551
1607
  requires(sizeof...(index_types_) >= 2)
1552
- decltype(auto) operator[](index_types_... idxs) noexcept {
1553
- return span()[idxs...];
1608
+ decltype(auto) operator()(index_types_... idxs) noexcept {
1609
+ return span()(idxs...);
1610
+ }
1611
+
1612
+ /** @brief Const multidimensional lookup via call syntax. */
1613
+ template <std::integral... index_types_>
1614
+ requires(sizeof...(index_types_) >= 2)
1615
+ decltype(auto) operator()(index_types_... idxs) const noexcept {
1616
+ return view()(idxs...);
1554
1617
  }
1555
1618
 
1556
- /** @brief Const multidimensional lookup. */
1619
+ #if NK_HAS_MULTIDIMENSIONAL_SUBSCRIPT_
1620
+ /** @brief C++23 sugar: multi-arg `[]` scalar lookup, delegates to `operator()`. */
1621
+ template <std::integral... index_types_>
1622
+ requires(sizeof...(index_types_) >= 2)
1623
+ decltype(auto) operator[](index_types_... idxs) noexcept {
1624
+ return (*this)(idxs...);
1625
+ }
1557
1626
  template <std::integral... index_types_>
1558
1627
  requires(sizeof...(index_types_) >= 2)
1559
1628
  decltype(auto) operator[](index_types_... idxs) const noexcept {
1560
- return view()[idxs...];
1629
+ return (*this)(idxs...);
1561
1630
  }
1631
+ #endif
1562
1632
 
1563
1633
  /** @brief Trailing `slice` returns the same tensor view/span category. */
1564
1634
  span_type operator[](tensor_slice_t) noexcept { return span(); }
1565
1635
  view_type operator[](tensor_slice_t) const noexcept { return view(); }
1566
1636
 
1567
- /** @brief Prefix leading-axis slicing with a trailing `slice` marker. */
1637
+ /** @brief Prefix leading-axis slicing via call syntax (C++20-portable). */
1568
1638
  template <typename first_type_, typename second_type_, typename... rest_types_>
1569
1639
  requires(trailing_tensor_slice_args_v<first_type_, second_type_, rest_types_...>)
1570
- span_type operator[](first_type_ first, second_type_ second, rest_types_... rest) noexcept {
1640
+ span_type operator()(first_type_ first, second_type_ second, rest_types_... rest) noexcept {
1571
1641
  return tensor_slice_suffix_(span(), first, second, rest...);
1572
1642
  }
1573
1643
 
1574
- /** @brief Const prefix leading-axis slicing with a trailing `slice` marker. */
1644
+ /** @brief Const prefix leading-axis slicing via call syntax. */
1575
1645
  template <typename first_type_, typename second_type_, typename... rest_types_>
1576
1646
  requires(trailing_tensor_slice_args_v<first_type_, second_type_, rest_types_...>)
1577
- view_type operator[](first_type_ first, second_type_ second, rest_types_... rest) const noexcept {
1647
+ view_type operator()(first_type_ first, second_type_ second, rest_types_... rest) const noexcept {
1578
1648
  return tensor_slice_suffix_(view(), first, second, rest...);
1579
1649
  }
1580
1650
 
1651
+ #if NK_HAS_MULTIDIMENSIONAL_SUBSCRIPT_
1652
+ /** @brief C++23 sugar: multi-arg `[]` slicing, delegates to `operator()`. */
1653
+ template <typename first_type_, typename second_type_, typename... rest_types_>
1654
+ requires(trailing_tensor_slice_args_v<first_type_, second_type_, rest_types_...>)
1655
+ span_type operator[](first_type_ first, second_type_ second, rest_types_... rest) noexcept {
1656
+ return (*this)(first, second, rest...);
1657
+ }
1658
+ template <typename first_type_, typename second_type_, typename... rest_types_>
1659
+ requires(trailing_tensor_slice_args_v<first_type_, second_type_, rest_types_...>)
1660
+ view_type operator[](first_type_ first, second_type_ second, rest_types_... rest) const noexcept {
1661
+ return (*this)(first, second, rest...);
1662
+ }
1663
+ #endif
1664
+
1581
1665
  /** @brief Rank-0 mutable scalar access. */
1582
1666
  decltype(auto) scalar_ref() noexcept { return span().scalar_ref(); }
1583
1667
 
@@ -159,9 +159,10 @@ static napi_value dense(napi_env env, napi_callback_info info, nk_kernel_kind_t
159
159
  // Auto-detect from N-API TypedArray type (backward-compatible 4-type whitelist)
160
160
  if (type_a != napi_float64_array && type_a != napi_float32_array && type_a != napi_int8_array &&
161
161
  type_a != napi_uint8_array) {
162
- napi_throw_error(
162
+ napi_throw_error( //
163
163
  env, NULL,
164
- "Only f64, f32, i8, u8 arrays are auto-detected; pass dtype string as 3rd argument " "for other " "types");
164
+ "Only f64, f32, i8, u8 arrays are auto-detected; " //
165
+ "pass dtype string as 3rd argument for other types");
165
166
  return NULL;
166
167
  }
167
168
  switch (type_a) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "numkong",
3
- "version": "7.5.0",
3
+ "version": "7.6.0",
4
4
  "description": "Portable mixed-precision math, linear-algebra, & retrieval library with 2000+ SIMD kernels for x86, Arm, RISC-V, LoongArch, Power, & WebAssembly",
5
5
  "homepage": "https://github.com/ashvardanian/NumKong",
6
6
  "author": "Ash Vardanian",
@@ -98,11 +98,11 @@
98
98
  "printWidth": 120
99
99
  },
100
100
  "optionalDependencies": {
101
- "@numkong/darwin-arm64": "7.5.0",
102
- "@numkong/darwin-x64": "7.5.0",
103
- "@numkong/linux-arm64": "7.5.0",
104
- "@numkong/linux-x64": "7.5.0",
105
- "@numkong/win32-arm64": "7.5.0",
106
- "@numkong/win32-x64": "7.5.0"
101
+ "@numkong/darwin-arm64": "7.6.0",
102
+ "@numkong/darwin-x64": "7.6.0",
103
+ "@numkong/linux-arm64": "7.6.0",
104
+ "@numkong/linux-x64": "7.6.0",
105
+ "@numkong/win32-arm64": "7.6.0",
106
+ "@numkong/win32-x64": "7.6.0"
107
107
  }
108
108
  }
package/wasm/numkong.wasm CHANGED
Binary file