numkong 7.5.0 → 7.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/binding.gyp +18 -0
- package/c/dispatch_e5m2.c +23 -3
- package/include/numkong/capabilities.h +1 -1
- package/include/numkong/cast/README.md +3 -0
- package/include/numkong/cast/haswell.h +28 -64
- package/include/numkong/cast/serial.h +17 -0
- package/include/numkong/cast/skylake.h +67 -52
- package/include/numkong/cast.h +1 -0
- package/include/numkong/dot/README.md +1 -0
- package/include/numkong/dot/haswell.h +92 -13
- package/include/numkong/dot/serial.h +15 -0
- package/include/numkong/dot/skylake.h +61 -14
- package/include/numkong/dots/README.md +2 -0
- package/include/numkong/dots/graniteamx.h +434 -0
- package/include/numkong/dots/haswell.h +28 -28
- package/include/numkong/dots/sapphireamx.h +1 -1
- package/include/numkong/dots/serial.h +23 -8
- package/include/numkong/dots/skylake.h +28 -23
- package/include/numkong/dots.h +12 -0
- package/include/numkong/each/serial.h +18 -1
- package/include/numkong/geospatial/serial.h +14 -3
- package/include/numkong/maxsim/serial.h +15 -0
- package/include/numkong/mesh/README.md +50 -44
- package/include/numkong/mesh/genoa.h +462 -0
- package/include/numkong/mesh/haswell.h +806 -933
- package/include/numkong/mesh/neon.h +871 -943
- package/include/numkong/mesh/neonbfdot.h +382 -522
- package/include/numkong/mesh/neonfhm.h +676 -0
- package/include/numkong/mesh/rvv.h +404 -319
- package/include/numkong/mesh/serial.h +204 -162
- package/include/numkong/mesh/skylake.h +1029 -1585
- package/include/numkong/mesh/v128relaxed.h +403 -377
- package/include/numkong/mesh.h +38 -0
- package/include/numkong/reduce/serial.h +15 -1
- package/include/numkong/sparse/serial.h +17 -2
- package/include/numkong/spatial/genoa.h +0 -68
- package/include/numkong/spatial/haswell.h +98 -56
- package/include/numkong/spatial/serial.h +15 -0
- package/include/numkong/spatial/skylake.h +114 -54
- package/include/numkong/spatial.h +0 -12
- package/include/numkong/spatials/graniteamx.h +128 -0
- package/include/numkong/spatials/serial.h +18 -1
- package/include/numkong/spatials/skylake.h +2 -2
- package/include/numkong/spatials.h +17 -0
- package/include/numkong/tensor.hpp +107 -23
- package/javascript/numkong.c +3 -2
- package/package.json +7 -7
- package/wasm/numkong.wasm +0 -0
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* @brief NumKong Tensor types and tensor-level operations for C++
|
|
2
|
+
* @brief NumKong Tensor types and tensor-level operations for C++20 and newer.
|
|
3
3
|
* @file include/numkong/tensor.hpp
|
|
4
4
|
* @author Ash Vardanian
|
|
5
5
|
* @date March 2026
|
|
@@ -19,7 +19,8 @@
|
|
|
19
19
|
* Features:
|
|
20
20
|
* - Signed strides (ptrdiff_t) for reversed/transposed views
|
|
21
21
|
* - Signed indexing (negative = from end)
|
|
22
|
-
* -
|
|
22
|
+
* - Variadic `operator()` for flat/exact access and trailing `slice` (C++20-portable);
|
|
23
|
+
* `operator[]` multi-arg sugar provided when the compiler supports P2128 (C++23).
|
|
23
24
|
* - Axis iteration (rows_views(), rows_spans(), axis_iterator)
|
|
24
25
|
* - Conversion to vector_view/vector_span for rank-1 tensors
|
|
25
26
|
*/
|
|
@@ -37,6 +38,14 @@
|
|
|
37
38
|
|
|
38
39
|
#include "vector.hpp" // `aligned_allocator`
|
|
39
40
|
|
|
41
|
+
// True when the compiler supports C++23 P2128 multi-arg `operator[]`. Under
|
|
42
|
+
// this gate we expose `t[a, b, c]` as sugar that delegates to `operator()`.
|
|
43
|
+
#if defined(__cpp_multidimensional_subscript) && __cpp_multidimensional_subscript >= 202110L
|
|
44
|
+
#define NK_HAS_MULTIDIMENSIONAL_SUBSCRIPT_ 1
|
|
45
|
+
#else
|
|
46
|
+
#define NK_HAS_MULTIDIMENSIONAL_SUBSCRIPT_ 0
|
|
47
|
+
#endif
|
|
48
|
+
|
|
40
49
|
namespace ashvardanian::numkong {
|
|
41
50
|
|
|
42
51
|
template <typename value_type_, std::size_t max_rank_>
|
|
@@ -300,26 +309,44 @@ struct tensor_view {
|
|
|
300
309
|
return tensor_flat_lookup_(*this, idx);
|
|
301
310
|
}
|
|
302
311
|
|
|
303
|
-
/** @brief Exact multi-dimensional scalar lookup. */
|
|
312
|
+
/** @brief Exact multi-dimensional scalar lookup via call syntax (C++20-portable). */
|
|
304
313
|
template <std::integral... index_types_>
|
|
305
314
|
requires(sizeof...(index_types_) >= 2)
|
|
306
|
-
decltype(auto) operator
|
|
315
|
+
decltype(auto) operator()(index_types_... idxs) const noexcept {
|
|
307
316
|
nk_assert_(shape_.rank == sizeof...(index_types_));
|
|
308
317
|
auto coords = resolve_tensor_indices_<value_type_>(shape_, std::index_sequence_for<index_types_...> {},
|
|
309
318
|
idxs...);
|
|
310
319
|
return tensor_lookup_resolved_(*this, std::span<std::size_t const, sizeof...(index_types_)>(coords));
|
|
311
320
|
}
|
|
312
321
|
|
|
322
|
+
#if NK_HAS_MULTIDIMENSIONAL_SUBSCRIPT_
|
|
323
|
+
/** @brief C++23 sugar: `t[i, j, k]` scalar lookup, delegates to `operator()`. */
|
|
324
|
+
template <std::integral... index_types_>
|
|
325
|
+
requires(sizeof...(index_types_) >= 2)
|
|
326
|
+
decltype(auto) operator[](index_types_... idxs) const noexcept {
|
|
327
|
+
return (*this)(idxs...);
|
|
328
|
+
}
|
|
329
|
+
#endif
|
|
330
|
+
|
|
313
331
|
/** @brief Trailing `slice` returns the same view. */
|
|
314
332
|
constexpr tensor_view operator[](tensor_slice_t) const noexcept { return *this; }
|
|
315
333
|
|
|
316
|
-
/** @brief Prefix leading-axis slicing with a trailing `slice` marker. */
|
|
334
|
+
/** @brief Prefix leading-axis slicing with a trailing `slice` marker (call syntax, C++20-portable). */
|
|
317
335
|
template <typename first_type_, typename second_type_, typename... rest_types_>
|
|
318
336
|
requires(trailing_tensor_slice_args_v<first_type_, second_type_, rest_types_...>)
|
|
319
|
-
tensor_view operator
|
|
337
|
+
tensor_view operator()(first_type_ first, second_type_ second, rest_types_... rest) const noexcept {
|
|
320
338
|
return tensor_slice_suffix_(*this, first, second, rest...);
|
|
321
339
|
}
|
|
322
340
|
|
|
341
|
+
#if NK_HAS_MULTIDIMENSIONAL_SUBSCRIPT_
|
|
342
|
+
/** @brief C++23 sugar: `t[i, nk::slice]` slicing, delegates to `operator()`. */
|
|
343
|
+
template <typename first_type_, typename second_type_, typename... rest_types_>
|
|
344
|
+
requires(trailing_tensor_slice_args_v<first_type_, second_type_, rest_types_...>)
|
|
345
|
+
tensor_view operator[](first_type_ first, second_type_ second, rest_types_... rest) const noexcept {
|
|
346
|
+
return (*this)(first, second, rest...);
|
|
347
|
+
}
|
|
348
|
+
#endif
|
|
349
|
+
|
|
323
350
|
/** @brief Rank-0 scalar access. */
|
|
324
351
|
decltype(auto) scalar() const noexcept {
|
|
325
352
|
nk_assert_(shape_.rank == 0);
|
|
@@ -512,22 +539,36 @@ struct tensor_span {
|
|
|
512
539
|
return tensor_flat_lookup_(static_cast<tensor_view<value_type_, max_rank_>>(*this), idx);
|
|
513
540
|
}
|
|
514
541
|
|
|
515
|
-
/** @brief Exact multi-dimensional scalar lookup. */
|
|
542
|
+
/** @brief Exact multi-dimensional scalar lookup via call syntax (C++20-portable). */
|
|
516
543
|
template <std::integral... index_types_>
|
|
517
544
|
requires(sizeof...(index_types_) >= 2)
|
|
518
|
-
decltype(auto) operator
|
|
545
|
+
decltype(auto) operator()(index_types_... idxs) noexcept {
|
|
519
546
|
nk_assert_(shape_.rank == sizeof...(index_types_));
|
|
520
547
|
auto coords = resolve_tensor_indices_<value_type_>(shape_, std::index_sequence_for<index_types_...> {},
|
|
521
548
|
idxs...);
|
|
522
549
|
return tensor_lookup_resolved_(*this, std::span<std::size_t const, sizeof...(index_types_)>(coords));
|
|
523
550
|
}
|
|
524
551
|
|
|
525
|
-
/** @brief Const full-coordinate lookup. */
|
|
552
|
+
/** @brief Const full-coordinate lookup via call syntax. */
|
|
553
|
+
template <std::integral... index_types_>
|
|
554
|
+
requires(sizeof...(index_types_) >= 2)
|
|
555
|
+
decltype(auto) operator()(index_types_... idxs) const noexcept {
|
|
556
|
+
return static_cast<tensor_view<value_type_, max_rank_>>(*this)(idxs...);
|
|
557
|
+
}
|
|
558
|
+
|
|
559
|
+
#if NK_HAS_MULTIDIMENSIONAL_SUBSCRIPT_
|
|
560
|
+
/** @brief C++23 sugar: multi-arg `[]` scalar lookup, delegates to `operator()`. */
|
|
561
|
+
template <std::integral... index_types_>
|
|
562
|
+
requires(sizeof...(index_types_) >= 2)
|
|
563
|
+
decltype(auto) operator[](index_types_... idxs) noexcept {
|
|
564
|
+
return (*this)(idxs...);
|
|
565
|
+
}
|
|
526
566
|
template <std::integral... index_types_>
|
|
527
567
|
requires(sizeof...(index_types_) >= 2)
|
|
528
568
|
decltype(auto) operator[](index_types_... idxs) const noexcept {
|
|
529
|
-
return
|
|
569
|
+
return (*this)(idxs...);
|
|
530
570
|
}
|
|
571
|
+
#endif
|
|
531
572
|
|
|
532
573
|
/** @brief Trailing `slice` returns the same span. */
|
|
533
574
|
constexpr tensor_span operator[](tensor_slice_t) noexcept { return *this; }
|
|
@@ -535,21 +576,36 @@ struct tensor_span {
|
|
|
535
576
|
return static_cast<tensor_view<value_type_, max_rank_>>(*this);
|
|
536
577
|
}
|
|
537
578
|
|
|
538
|
-
/** @brief Prefix leading-axis slicing
|
|
579
|
+
/** @brief Prefix leading-axis slicing via call syntax (C++20-portable). */
|
|
539
580
|
template <typename first_type_, typename second_type_, typename... rest_types_>
|
|
540
581
|
requires(trailing_tensor_slice_args_v<first_type_, second_type_, rest_types_...>)
|
|
541
|
-
tensor_span operator
|
|
582
|
+
tensor_span operator()(first_type_ first, second_type_ second, rest_types_... rest) noexcept {
|
|
542
583
|
return tensor_slice_suffix_(*this, first, second, rest...);
|
|
543
584
|
}
|
|
544
585
|
|
|
545
|
-
/** @brief Const prefix leading-axis slicing
|
|
586
|
+
/** @brief Const prefix leading-axis slicing via call syntax. */
|
|
546
587
|
template <typename first_type_, typename second_type_, typename... rest_types_>
|
|
547
588
|
requires(trailing_tensor_slice_args_v<first_type_, second_type_, rest_types_...>)
|
|
548
|
-
tensor_view<value_type_, max_rank_> operator
|
|
589
|
+
tensor_view<value_type_, max_rank_> operator()(first_type_ first, second_type_ second,
|
|
549
590
|
rest_types_... rest) const noexcept {
|
|
550
591
|
return tensor_slice_suffix_(static_cast<tensor_view<value_type_, max_rank_>>(*this), first, second, rest...);
|
|
551
592
|
}
|
|
552
593
|
|
|
594
|
+
#if NK_HAS_MULTIDIMENSIONAL_SUBSCRIPT_
|
|
595
|
+
/** @brief C++23 sugar: multi-arg `[]` slicing, delegates to `operator()`. */
|
|
596
|
+
template <typename first_type_, typename second_type_, typename... rest_types_>
|
|
597
|
+
requires(trailing_tensor_slice_args_v<first_type_, second_type_, rest_types_...>)
|
|
598
|
+
tensor_span operator[](first_type_ first, second_type_ second, rest_types_... rest) noexcept {
|
|
599
|
+
return (*this)(first, second, rest...);
|
|
600
|
+
}
|
|
601
|
+
template <typename first_type_, typename second_type_, typename... rest_types_>
|
|
602
|
+
requires(trailing_tensor_slice_args_v<first_type_, second_type_, rest_types_...>)
|
|
603
|
+
tensor_view<value_type_, max_rank_> operator[](first_type_ first, second_type_ second,
|
|
604
|
+
rest_types_... rest) const noexcept {
|
|
605
|
+
return (*this)(first, second, rest...);
|
|
606
|
+
}
|
|
607
|
+
#endif
|
|
608
|
+
|
|
553
609
|
/** @brief Rank-0 mutable scalar access. */
|
|
554
610
|
decltype(auto) scalar_ref() noexcept {
|
|
555
611
|
nk_assert_(shape_.rank == 0);
|
|
@@ -1546,38 +1602,66 @@ struct tensor {
|
|
|
1546
1602
|
return view()[idx];
|
|
1547
1603
|
}
|
|
1548
1604
|
|
|
1549
|
-
/** @brief Exact multi-dimensional scalar lookup. */
|
|
1605
|
+
/** @brief Exact multi-dimensional scalar lookup via call syntax (C++20-portable). */
|
|
1550
1606
|
template <std::integral... index_types_>
|
|
1551
1607
|
requires(sizeof...(index_types_) >= 2)
|
|
1552
|
-
decltype(auto) operator
|
|
1553
|
-
return span()
|
|
1608
|
+
decltype(auto) operator()(index_types_... idxs) noexcept {
|
|
1609
|
+
return span()(idxs...);
|
|
1610
|
+
}
|
|
1611
|
+
|
|
1612
|
+
/** @brief Const multidimensional lookup via call syntax. */
|
|
1613
|
+
template <std::integral... index_types_>
|
|
1614
|
+
requires(sizeof...(index_types_) >= 2)
|
|
1615
|
+
decltype(auto) operator()(index_types_... idxs) const noexcept {
|
|
1616
|
+
return view()(idxs...);
|
|
1554
1617
|
}
|
|
1555
1618
|
|
|
1556
|
-
|
|
1619
|
+
#if NK_HAS_MULTIDIMENSIONAL_SUBSCRIPT_
|
|
1620
|
+
/** @brief C++23 sugar: multi-arg `[]` scalar lookup, delegates to `operator()`. */
|
|
1621
|
+
template <std::integral... index_types_>
|
|
1622
|
+
requires(sizeof...(index_types_) >= 2)
|
|
1623
|
+
decltype(auto) operator[](index_types_... idxs) noexcept {
|
|
1624
|
+
return (*this)(idxs...);
|
|
1625
|
+
}
|
|
1557
1626
|
template <std::integral... index_types_>
|
|
1558
1627
|
requires(sizeof...(index_types_) >= 2)
|
|
1559
1628
|
decltype(auto) operator[](index_types_... idxs) const noexcept {
|
|
1560
|
-
return
|
|
1629
|
+
return (*this)(idxs...);
|
|
1561
1630
|
}
|
|
1631
|
+
#endif
|
|
1562
1632
|
|
|
1563
1633
|
/** @brief Trailing `slice` returns the same tensor view/span category. */
|
|
1564
1634
|
span_type operator[](tensor_slice_t) noexcept { return span(); }
|
|
1565
1635
|
view_type operator[](tensor_slice_t) const noexcept { return view(); }
|
|
1566
1636
|
|
|
1567
|
-
/** @brief Prefix leading-axis slicing
|
|
1637
|
+
/** @brief Prefix leading-axis slicing via call syntax (C++20-portable). */
|
|
1568
1638
|
template <typename first_type_, typename second_type_, typename... rest_types_>
|
|
1569
1639
|
requires(trailing_tensor_slice_args_v<first_type_, second_type_, rest_types_...>)
|
|
1570
|
-
span_type operator
|
|
1640
|
+
span_type operator()(first_type_ first, second_type_ second, rest_types_... rest) noexcept {
|
|
1571
1641
|
return tensor_slice_suffix_(span(), first, second, rest...);
|
|
1572
1642
|
}
|
|
1573
1643
|
|
|
1574
|
-
/** @brief Const prefix leading-axis slicing
|
|
1644
|
+
/** @brief Const prefix leading-axis slicing via call syntax. */
|
|
1575
1645
|
template <typename first_type_, typename second_type_, typename... rest_types_>
|
|
1576
1646
|
requires(trailing_tensor_slice_args_v<first_type_, second_type_, rest_types_...>)
|
|
1577
|
-
view_type operator
|
|
1647
|
+
view_type operator()(first_type_ first, second_type_ second, rest_types_... rest) const noexcept {
|
|
1578
1648
|
return tensor_slice_suffix_(view(), first, second, rest...);
|
|
1579
1649
|
}
|
|
1580
1650
|
|
|
1651
|
+
#if NK_HAS_MULTIDIMENSIONAL_SUBSCRIPT_
|
|
1652
|
+
/** @brief C++23 sugar: multi-arg `[]` slicing, delegates to `operator()`. */
|
|
1653
|
+
template <typename first_type_, typename second_type_, typename... rest_types_>
|
|
1654
|
+
requires(trailing_tensor_slice_args_v<first_type_, second_type_, rest_types_...>)
|
|
1655
|
+
span_type operator[](first_type_ first, second_type_ second, rest_types_... rest) noexcept {
|
|
1656
|
+
return (*this)(first, second, rest...);
|
|
1657
|
+
}
|
|
1658
|
+
template <typename first_type_, typename second_type_, typename... rest_types_>
|
|
1659
|
+
requires(trailing_tensor_slice_args_v<first_type_, second_type_, rest_types_...>)
|
|
1660
|
+
view_type operator[](first_type_ first, second_type_ second, rest_types_... rest) const noexcept {
|
|
1661
|
+
return (*this)(first, second, rest...);
|
|
1662
|
+
}
|
|
1663
|
+
#endif
|
|
1664
|
+
|
|
1581
1665
|
/** @brief Rank-0 mutable scalar access. */
|
|
1582
1666
|
decltype(auto) scalar_ref() noexcept { return span().scalar_ref(); }
|
|
1583
1667
|
|
package/javascript/numkong.c
CHANGED
|
@@ -159,9 +159,10 @@ static napi_value dense(napi_env env, napi_callback_info info, nk_kernel_kind_t
|
|
|
159
159
|
// Auto-detect from N-API TypedArray type (backward-compatible 4-type whitelist)
|
|
160
160
|
if (type_a != napi_float64_array && type_a != napi_float32_array && type_a != napi_int8_array &&
|
|
161
161
|
type_a != napi_uint8_array) {
|
|
162
|
-
napi_throw_error(
|
|
162
|
+
napi_throw_error( //
|
|
163
163
|
env, NULL,
|
|
164
|
-
"Only f64, f32, i8, u8 arrays are auto-detected;
|
|
164
|
+
"Only f64, f32, i8, u8 arrays are auto-detected; " //
|
|
165
|
+
"pass dtype string as 3rd argument for other types");
|
|
165
166
|
return NULL;
|
|
166
167
|
}
|
|
167
168
|
switch (type_a) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "numkong",
|
|
3
|
-
"version": "7.
|
|
3
|
+
"version": "7.6.0",
|
|
4
4
|
"description": "Portable mixed-precision math, linear-algebra, & retrieval library with 2000+ SIMD kernels for x86, Arm, RISC-V, LoongArch, Power, & WebAssembly",
|
|
5
5
|
"homepage": "https://github.com/ashvardanian/NumKong",
|
|
6
6
|
"author": "Ash Vardanian",
|
|
@@ -98,11 +98,11 @@
|
|
|
98
98
|
"printWidth": 120
|
|
99
99
|
},
|
|
100
100
|
"optionalDependencies": {
|
|
101
|
-
"@numkong/darwin-arm64": "7.
|
|
102
|
-
"@numkong/darwin-x64": "7.
|
|
103
|
-
"@numkong/linux-arm64": "7.
|
|
104
|
-
"@numkong/linux-x64": "7.
|
|
105
|
-
"@numkong/win32-arm64": "7.
|
|
106
|
-
"@numkong/win32-x64": "7.
|
|
101
|
+
"@numkong/darwin-arm64": "7.6.0",
|
|
102
|
+
"@numkong/darwin-x64": "7.6.0",
|
|
103
|
+
"@numkong/linux-arm64": "7.6.0",
|
|
104
|
+
"@numkong/linux-x64": "7.6.0",
|
|
105
|
+
"@numkong/win32-arm64": "7.6.0",
|
|
106
|
+
"@numkong/win32-x64": "7.6.0"
|
|
107
107
|
}
|
|
108
108
|
}
|
package/wasm/numkong.wasm
CHANGED
|
Binary file
|