tomoto 0.6.0 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +9 -0
  3. data/LICENSE.txt +1 -1
  4. data/ext/tomoto/ct.cpp +1 -1
  5. data/ext/tomoto/dmr.cpp +1 -1
  6. data/ext/tomoto/dt.cpp +1 -1
  7. data/ext/tomoto/extconf.rb +4 -8
  8. data/ext/tomoto/gdmr.cpp +1 -1
  9. data/ext/tomoto/hdp.cpp +1 -1
  10. data/ext/tomoto/hlda.cpp +1 -1
  11. data/ext/tomoto/hpa.cpp +1 -1
  12. data/ext/tomoto/lda.cpp +29 -3
  13. data/ext/tomoto/llda.cpp +1 -1
  14. data/ext/tomoto/mglda.cpp +1 -1
  15. data/ext/tomoto/pa.cpp +1 -1
  16. data/ext/tomoto/plda.cpp +1 -1
  17. data/ext/tomoto/slda.cpp +1 -1
  18. data/lib/tomoto/lda.rb +1 -0
  19. data/lib/tomoto/version.rb +1 -1
  20. data/vendor/EigenRand/EigenRand/Core.h +6 -4
  21. data/vendor/EigenRand/EigenRand/CwiseHeteroBinaryOp.h +265 -0
  22. data/vendor/EigenRand/EigenRand/Dists/Basic.h +345 -12
  23. data/vendor/EigenRand/EigenRand/Dists/Discrete.h +381 -7
  24. data/vendor/EigenRand/EigenRand/Dists/GammaPoisson.h +4 -4
  25. data/vendor/EigenRand/EigenRand/Dists/NormalExp.h +576 -4
  26. data/vendor/EigenRand/EigenRand/EigenRand +4 -4
  27. data/vendor/EigenRand/EigenRand/Macro.h +3 -3
  28. data/vendor/EigenRand/EigenRand/MorePacketMath.h +31 -30
  29. data/vendor/EigenRand/EigenRand/MvDists/Multinomial.h +41 -29
  30. data/vendor/EigenRand/EigenRand/MvDists/MvNormal.h +19 -7
  31. data/vendor/EigenRand/EigenRand/PacketFilter.h +8 -5
  32. data/vendor/EigenRand/EigenRand/PacketRandomEngine.h +3 -3
  33. data/vendor/EigenRand/EigenRand/RandUtils.h +180 -5
  34. data/vendor/EigenRand/EigenRand/arch/AVX/MorePacketMath.h +42 -3
  35. data/vendor/EigenRand/EigenRand/arch/AVX/PacketFilter.h +3 -3
  36. data/vendor/EigenRand/EigenRand/arch/AVX/RandUtils.h +3 -3
  37. data/vendor/EigenRand/EigenRand/arch/AVX512/MorePacketMath.h +312 -0
  38. data/vendor/EigenRand/EigenRand/arch/AVX512/PacketFilter.h +79 -0
  39. data/vendor/EigenRand/EigenRand/arch/AVX512/RandUtils.h +147 -0
  40. data/vendor/EigenRand/EigenRand/arch/NEON/MorePacketMath.h +118 -3
  41. data/vendor/EigenRand/EigenRand/arch/NEON/PacketFilter.h +3 -3
  42. data/vendor/EigenRand/EigenRand/arch/NEON/RandUtils.h +21 -3
  43. data/vendor/EigenRand/EigenRand/arch/SSE/MorePacketMath.h +32 -4
  44. data/vendor/EigenRand/EigenRand/arch/SSE/PacketFilter.h +3 -3
  45. data/vendor/EigenRand/EigenRand/arch/SSE/RandUtils.h +3 -3
  46. data/vendor/EigenRand/EigenRand/doc.h +108 -157
  47. data/vendor/EigenRand/README.md +60 -272
  48. data/vendor/tomotopy/README.kr.rst +27 -5
  49. data/vendor/tomotopy/README.rst +27 -5
  50. data/vendor/tomotopy/README_pypi.rst +583 -0
  51. data/vendor/tomotopy/licenses_bundled/EigenRand +21 -0
  52. data/vendor/tomotopy/src/TopicModel/CT.h +1 -1
  53. data/vendor/tomotopy/src/TopicModel/CTModel.cpp +1 -1
  54. data/vendor/tomotopy/src/TopicModel/DMR.h +1 -1
  55. data/vendor/tomotopy/src/TopicModel/DMRModel.cpp +1 -1
  56. data/vendor/tomotopy/src/TopicModel/DT.h +1 -1
  57. data/vendor/tomotopy/src/TopicModel/DTModel.cpp +1 -1
  58. data/vendor/tomotopy/src/TopicModel/GDMR.h +1 -1
  59. data/vendor/tomotopy/src/TopicModel/GDMRModel.cpp +1 -1
  60. data/vendor/tomotopy/src/TopicModel/HDP.h +1 -1
  61. data/vendor/tomotopy/src/TopicModel/HDPModel.cpp +1 -1
  62. data/vendor/tomotopy/src/TopicModel/HLDA.h +1 -1
  63. data/vendor/tomotopy/src/TopicModel/HLDAModel.cpp +1 -1
  64. data/vendor/tomotopy/src/TopicModel/HPA.h +1 -1
  65. data/vendor/tomotopy/src/TopicModel/HPAModel.cpp +1 -1
  66. data/vendor/tomotopy/src/TopicModel/LDA.h +1 -1
  67. data/vendor/tomotopy/src/TopicModel/LDAModel.cpp +1 -1
  68. data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +6 -6
  69. data/vendor/tomotopy/src/TopicModel/LLDA.h +1 -1
  70. data/vendor/tomotopy/src/TopicModel/LLDAModel.cpp +1 -1
  71. data/vendor/tomotopy/src/TopicModel/MGLDA.h +1 -1
  72. data/vendor/tomotopy/src/TopicModel/MGLDAModel.cpp +1 -1
  73. data/vendor/tomotopy/src/TopicModel/PA.h +1 -1
  74. data/vendor/tomotopy/src/TopicModel/PAModel.cpp +1 -1
  75. data/vendor/tomotopy/src/TopicModel/PLDA.h +1 -1
  76. data/vendor/tomotopy/src/TopicModel/PLDAModel.cpp +1 -1
  77. data/vendor/tomotopy/src/TopicModel/PT.h +1 -1
  78. data/vendor/tomotopy/src/TopicModel/PTModel.cpp +1 -1
  79. data/vendor/tomotopy/src/TopicModel/SLDA.h +1 -1
  80. data/vendor/tomotopy/src/TopicModel/SLDAModel.cpp +1 -1
  81. data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +6 -6
  82. data/vendor/tomotopy/src/Utils/EigenAddonOps.hpp +41 -0
  83. data/vendor/tomotopy/src/Utils/ThreadPool.hpp +6 -6
  84. data/vendor/tomotopy/src/Utils/Utils.hpp +3 -3
  85. data/vendor/tomotopy/src/Utils/avx512_gamma.h +46 -0
  86. data/vendor/tomotopy/src/Utils/avx512_mathfun.h +99 -0
  87. metadata +10 -9
  88. data/vendor/variant/LICENSE +0 -25
  89. data/vendor/variant/LICENSE_1_0.txt +0 -23
  90. data/vendor/variant/README.md +0 -102
  91. data/vendor/variant/include/mapbox/optional.hpp +0 -74
  92. data/vendor/variant/include/mapbox/recursive_wrapper.hpp +0 -122
  93. data/vendor/variant/include/mapbox/variant.hpp +0 -974
  94. data/vendor/variant/include/mapbox/variant_io.hpp +0 -45
@@ -0,0 +1,312 @@
1
+ /**
2
+ * @file MorePacketMath.h
3
+ * @author bab2min (bab2min@gmail.com)
4
+ * @brief
5
+ * @version 0.5.1
6
+ * @date 2024-09-08
7
+ *
8
+ * @copyright Copyright (c) 2020-2024
9
+ *
10
+ */
11
+
12
+ #ifndef EIGENRAND_MORE_PACKET_MATH_AVX512_H
13
+ #define EIGENRAND_MORE_PACKET_MATH_AVX512_H
14
+
15
+ #include <immintrin.h>
16
+
17
+ namespace Eigen
18
+ {
19
+ namespace internal
20
+ {
21
+ template<>
22
+ struct IsIntPacket<Packet16i> : std::true_type {};
23
+
24
+ template<>
25
+ struct HalfPacket<Packet16i>
26
+ {
27
+ using type = Packet8i;
28
+ };
29
+
30
+ template<>
31
+ struct HalfPacket<Packet16f>
32
+ {
33
+ using type = Packet8f;
34
+ };
35
+
36
+ template<>
37
+ struct IsFloatPacket<Packet16f> : std::true_type {};
38
+
39
+ template<>
40
+ struct IsDoublePacket<Packet8d> : std::true_type {};
41
+
42
+ template<>
43
+ struct reinterpreter<Packet16i>
44
+ {
45
+ EIGEN_STRONG_INLINE Packet16f to_float(const Packet16i& x)
46
+ {
47
+ return _mm512_castsi512_ps(x);
48
+ }
49
+
50
+ EIGEN_STRONG_INLINE Packet8d to_double(const Packet16i& x)
51
+ {
52
+ return _mm512_castsi512_pd(x);
53
+ }
54
+
55
+ EIGEN_STRONG_INLINE Packet16i to_int(const Packet16i& x)
56
+ {
57
+ return x;
58
+ }
59
+ };
60
+
61
+ template<>
62
+ struct reinterpreter<Packet16f>
63
+ {
64
+ EIGEN_STRONG_INLINE Packet16f to_float(const Packet16f& x)
65
+ {
66
+ return x;
67
+ }
68
+
69
+ EIGEN_STRONG_INLINE Packet8d to_double(const Packet16f& x)
70
+ {
71
+ return _mm512_castps_pd(x);
72
+ }
73
+
74
+ EIGEN_STRONG_INLINE Packet16i to_int(const Packet16f& x)
75
+ {
76
+ return _mm512_castps_si512(x);
77
+ }
78
+ };
79
+
80
+ template<>
81
+ struct reinterpreter<Packet8d>
82
+ {
83
+ EIGEN_STRONG_INLINE Packet16f to_float(const Packet8d& x)
84
+ {
85
+ return _mm512_castpd_ps(x);
86
+ }
87
+
88
+ EIGEN_STRONG_INLINE Packet8d to_double(const Packet8d& x)
89
+ {
90
+ return x;
91
+ }
92
+
93
+ EIGEN_STRONG_INLINE Packet16i to_int(const Packet8d& x)
94
+ {
95
+ return _mm512_castpd_si512(x);
96
+ }
97
+ };
98
+
99
+ template<>
100
+ EIGEN_STRONG_INLINE Packet16i pseti64<Packet16i>(uint64_t a)
101
+ {
102
+ return _mm512_set1_epi64(a);
103
+ }
104
+
105
+ template<>
106
+ EIGEN_STRONG_INLINE Packet16i padd64<Packet16i>(const Packet16i& a, const Packet16i& b)
107
+ {
108
+ return _mm512_add_epi64(a, b);
109
+ }
110
+
111
+ template<>
112
+ EIGEN_STRONG_INLINE Packet16i psub64<Packet16i>(const Packet16i& a, const Packet16i& b)
113
+ {
114
+ return _mm512_sub_epi64(a, b);
115
+ }
116
+
117
+ template<>
118
+ EIGEN_STRONG_INLINE Packet16i pcmpeq<Packet16i>(const Packet16i& a, const Packet16i& b)
119
+ {
120
+ return pcmp_eq(a, b);
121
+ }
122
+
123
+ template<>
124
+ EIGEN_STRONG_INLINE Packet16f pcmpeq<Packet16f>(const Packet16f& a, const Packet16f& b)
125
+ {
126
+ return pcmp_eq(a, b);
127
+ }
128
+
129
+ template<>
130
+ EIGEN_STRONG_INLINE Packet16i pnegate<Packet16i>(const Packet16i& a)
131
+ {
132
+ return _mm512_sub_epi32(pset1<Packet16i>(0), a);
133
+ }
134
+
135
+ template<>
136
+ struct BitShifter<Packet16i>
137
+ {
138
+ template<int b>
139
+ EIGEN_STRONG_INLINE Packet16i sll(const Packet16i& a)
140
+ {
141
+ return _mm512_slli_epi32(a, b);
142
+ }
143
+
144
+ template<int b>
145
+ EIGEN_STRONG_INLINE Packet16i srl(const Packet16i& a, int _b = b)
146
+ {
147
+ if (b >= 0)
148
+ {
149
+ return _mm512_srli_epi32(a, b);
150
+ }
151
+ else
152
+ {
153
+ return _mm512_srli_epi32(a, _b);
154
+ }
155
+ }
156
+
157
+ template<int b>
158
+ EIGEN_STRONG_INLINE Packet16i sll64(const Packet16i& a)
159
+ {
160
+ return _mm512_slli_epi64(a, b);
161
+ }
162
+
163
+ template<int b>
164
+ EIGEN_STRONG_INLINE Packet16i srl64(const Packet16i& a)
165
+ {
166
+ return _mm512_srli_epi64(a, b);
167
+ }
168
+ };
169
+
170
+ template<> EIGEN_STRONG_INLINE bool predux_all(const Packet16i& x)
171
+ {
172
+ return _mm512_movepi32_mask(x) == 0xFFFF;
173
+ }
174
+
175
+ template<> EIGEN_STRONG_INLINE bool predux_all(const Packet16f& x)
176
+ {
177
+ return predux_all(_mm512_castps_si512(x));
178
+ }
179
+
180
+ template<>
181
+ EIGEN_STRONG_INLINE Packet16i pcmplt<Packet16i>(const Packet16i& a, const Packet16i& b)
182
+ {
183
+ __mmask16 mask = _mm512_cmp_epi32_mask(a, b, _MM_CMPINT_LT);
184
+ return _mm512_movm_epi32(mask);
185
+ }
186
+
187
+ template<>
188
+ EIGEN_STRONG_INLINE Packet16f pcmplt<Packet16f>(const Packet16f& a, const Packet16f& b)
189
+ {
190
+ return pcmp_lt(a, b);
191
+ }
192
+
193
+ template<>
194
+ EIGEN_STRONG_INLINE Packet16f pcmple<Packet16f>(const Packet16f& a, const Packet16f& b)
195
+ {
196
+ return pcmp_le(a, b);
197
+ }
198
+
199
+ template<>
200
+ EIGEN_STRONG_INLINE Packet8d pcmplt<Packet8d>(const Packet8d& a, const Packet8d& b)
201
+ {
202
+ return pcmp_lt(a, b);
203
+ }
204
+ template<>
205
+ EIGEN_STRONG_INLINE Packet8d pcmple<Packet8d>(const Packet8d& a, const Packet8d& b)
206
+ {
207
+ return pcmp_le(a, b);
208
+ }
209
+
210
+ template<>
211
+ EIGEN_STRONG_INLINE Packet16f pblendv(const Packet16i& ifPacket, const Packet16f& thenPacket, const Packet16f& elsePacket)
212
+ {
213
+ __mmask16 mask = _mm512_movepi32_mask(ifPacket);
214
+ return _mm512_mask_blend_ps(mask, elsePacket, thenPacket);
215
+ }
216
+
217
+ template<>
218
+ EIGEN_STRONG_INLINE Packet16f pblendv(const Packet16f& ifPacket, const Packet16f& thenPacket, const Packet16f& elsePacket)
219
+ {
220
+ return pblendv(_mm512_castps_si512(ifPacket), thenPacket, elsePacket);
221
+ }
222
+
223
+ template<>
224
+ EIGEN_STRONG_INLINE Packet16i pblendv(const Packet16i& ifPacket, const Packet16i& thenPacket, const Packet16i& elsePacket)
225
+ {
226
+ __mmask16 mask = _mm512_movepi32_mask(ifPacket);
227
+ return _mm512_mask_blend_epi32(mask, elsePacket, thenPacket);
228
+ }
229
+
230
+ template<>
231
+ EIGEN_STRONG_INLINE Packet8d pblendv(const Packet16i& ifPacket, const Packet8d& thenPacket, const Packet8d& elsePacket)
232
+ {
233
+ __mmask8 mask = _mm512_movepi64_mask(ifPacket);
234
+ return _mm512_mask_blend_pd(mask, elsePacket, thenPacket);
235
+ }
236
+
237
+ template<>
238
+ EIGEN_STRONG_INLINE Packet8d pblendv(const Packet8d& ifPacket, const Packet8d& thenPacket, const Packet8d& elsePacket)
239
+ {
240
+ return pblendv(_mm512_castpd_si512(ifPacket), thenPacket, elsePacket);
241
+ }
242
+
243
+ template<>
244
+ EIGEN_STRONG_INLINE Packet16i pgather<Packet16i>(const int* addr, const Packet16i& index)
245
+ {
246
+ return _mm512_i32gather_epi32(index, addr, 4);
247
+ }
248
+
249
+ template<>
250
+ EIGEN_STRONG_INLINE Packet16f pgather<Packet16i>(const float* addr, const Packet16i& index)
251
+ {
252
+ return _mm512_i32gather_ps(index, addr, 4);
253
+ }
254
+
255
+ template<>
256
+ EIGEN_STRONG_INLINE Packet8d pgather<Packet16i>(const double* addr, const Packet16i& index, bool upperhalf)
257
+ {
258
+ return _mm512_i32gather_pd(_mm512_castsi512_si256(index), addr, 8);
259
+ }
260
+
261
+ template<>
262
+ EIGEN_STRONG_INLINE Packet16f ptruncate<Packet16f>(const Packet16f& a)
263
+ {
264
+ return _mm512_roundscale_ps(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
265
+ }
266
+
267
+ template<>
268
+ EIGEN_STRONG_INLINE Packet8d ptruncate<Packet8d>(const Packet8d& a)
269
+ {
270
+ return _mm512_roundscale_pd(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
271
+ }
272
+
273
+ template<>
274
+ EIGEN_STRONG_INLINE Packet16i pcmpeq64<Packet16i>(const Packet16i& a, const Packet16i& b)
275
+ {
276
+ __mmask8 mask = _mm512_cmp_epi64_mask(a, b, _MM_CMPINT_EQ);
277
+ return _mm512_movm_epi64(mask);
278
+ }
279
+
280
+ EIGEN_STRONG_INLINE __m512d int64_to_double_avx512(__m512i x) {
281
+ x = padd64(x, _mm512_castpd_si512(_mm512_set1_pd(0x0018000000000000)));
282
+ return _mm512_sub_pd(_mm512_castsi512_pd(x), _mm512_set1_pd(0x0018000000000000));
283
+ }
284
+
285
+ EIGEN_STRONG_INLINE __m512i double_to_int64_avx512(__m512d x) {
286
+ x = _mm512_add_pd(_mm512_floor_pd(x), _mm512_set1_pd(0x0018000000000000));
287
+ return psub64(
288
+ _mm512_castpd_si512(x),
289
+ _mm512_castpd_si512(_mm512_set1_pd(0x0018000000000000))
290
+ );
291
+ }
292
+ template<>
293
+ EIGEN_STRONG_INLINE Packet16i pcast64<Packet8d, Packet16i>(const Packet8d& a)
294
+ {
295
+ return double_to_int64_avx512(a);
296
+ }
297
+
298
+ template<>
299
+ EIGEN_STRONG_INLINE Packet8d pcast64<Packet16i, Packet8d>(const Packet16i& a)
300
+ {
301
+ return int64_to_double_avx512(a);
302
+ }
303
+
304
+ template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
305
+ Packet8d psin<Packet8d>(const Packet8d& x)
306
+ {
307
+ return _psin(x);
308
+ }
309
+ }
310
+ }
311
+
312
+ #endif
@@ -0,0 +1,79 @@
1
+ /**
2
+ * @file PacketFilter.h
3
+ * @author bab2min (bab2min@gmail.com)
4
+ * @brief
5
+ * @version 0.5.1
6
+ * @date 2024-09-08
7
+ *
8
+ * @copyright Copyright (c) 2020-2024
9
+ *
10
+ */
11
+
12
+ #ifndef EIGENRAND_PACKET_FILTER_AVX512_H
13
+ #define EIGENRAND_PACKET_FILTER_AVX512_H
14
+
15
+ #include <immintrin.h>
16
+
17
+ namespace Eigen
18
+ {
19
+ namespace Rand
20
+ {
21
+ namespace detail
22
+ {
23
+ template<>
24
+ class CompressMask<64>
25
+ {
26
+ CompressMask() {}
27
+
28
+ public:
29
+ enum { full_size = 16 };
30
+ static const CompressMask& get_inst()
31
+ {
32
+ static CompressMask cm;
33
+ return cm;
34
+ }
35
+
36
+ template<typename Packet>
37
+ EIGEN_STRONG_INLINE int compress_append(Packet& _value, const Packet& _mask,
38
+ Packet& _rest, int rest_cnt, bool& full) const
39
+ {
40
+ auto& value = reinterpret_cast<internal::Packet16f&>(_value);
41
+ auto& mask = reinterpret_cast<const internal::Packet16f&>(_mask);
42
+ auto& rest = reinterpret_cast<internal::Packet16f&>(_rest);
43
+
44
+ const __mmask16 m = _mm512_movepi32_mask(_mm512_castps_si512(mask));
45
+
46
+ if (m == 0xFFFF)
47
+ {
48
+ full = true;
49
+ return rest_cnt;
50
+ }
51
+
52
+ const int cnt_m = _mm_popcnt_u32(m);
53
+
54
+ const __m512i counting = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
55
+ __m512i rotate = _mm512_sub_epi32(counting, _mm512_set1_epi32(cnt_m));
56
+ __m512 rot_rest = _mm512_permutexvar_ps(rotate, rest);
57
+
58
+ __m512 p1 = _mm512_mask_compress_ps(rot_rest, m, value);
59
+
60
+ auto new_cnt = rest_cnt + cnt_m;
61
+ if (new_cnt >= full_size)
62
+ {
63
+ rest = rot_rest;
64
+ value = p1;
65
+ full = true;
66
+ return new_cnt - full_size;
67
+ }
68
+ else
69
+ {
70
+ rest = p1;
71
+ full = false;
72
+ return new_cnt;
73
+ }
74
+ }
75
+ };
76
+ }
77
+ }
78
+ }
79
+ #endif
@@ -0,0 +1,147 @@
1
+ /**
2
+ * @file RandUtils.h
3
+ * @author bab2min (bab2min@gmail.com)
4
+ * @brief
5
+ * @version 0.5.1
6
+ * @date 2024-09-08
7
+ *
8
+ * @copyright Copyright (c) 2020-2024
9
+ *
10
+ */
11
+
12
+ #ifndef EIGENRAND_RAND_UTILS_AVX512_H
13
+ #define EIGENRAND_RAND_UTILS_AVX512_H
14
+
15
+ #include <immintrin.h>
16
+
17
+ namespace Eigen
18
+ {
19
+ namespace internal
20
+ {
21
+ template<typename Rng>
22
+ struct RawbitsMaker<Packet8i, Rng, Packet16i, Rand::RandomEngineType::packet>
23
+ {
24
+ EIGEN_STRONG_INLINE Packet8i rawbits(Rng& rng)
25
+ {
26
+ return rng.half();
27
+ }
28
+
29
+ EIGEN_STRONG_INLINE Packet8i rawbits_34(Rng& rng)
30
+ {
31
+ return rng.half();
32
+ }
33
+
34
+ EIGEN_STRONG_INLINE Packet8i rawbits_half(Rng& rng)
35
+ {
36
+ return rng.half();
37
+ }
38
+ };
39
+
40
+ template<typename Rng>
41
+ struct RawbitsMaker<Packet16i, Rng, Packet8i, Rand::RandomEngineType::packet>
42
+ {
43
+ EIGEN_STRONG_INLINE Packet16i rawbits(Rng& rng)
44
+ {
45
+ return _mm512_inserti64x4(_mm512_castsi256_si512(rng()), rng(), 1);
46
+ }
47
+
48
+ EIGEN_STRONG_INLINE Packet16i rawbits_34(Rng& rng)
49
+ {
50
+ return _mm512_inserti64x4(_mm512_castsi256_si512(rng()), rng(), 1);
51
+ }
52
+
53
+ EIGEN_STRONG_INLINE Packet8i rawbits_half(Rng& rng)
54
+ {
55
+ return rng();
56
+ }
57
+ };
58
+
59
+ template<typename Rng, typename RngResult>
60
+ struct RawbitsMaker<Packet16i, Rng, RngResult, Rand::RandomEngineType::scalar_fullbit>
61
+ {
62
+ EIGEN_STRONG_INLINE Packet16i rawbits(Rng& rng)
63
+ {
64
+ if (sizeof(decltype(rng())) == 8)
65
+ {
66
+ return _mm512_set_epi64(rng(), rng(), rng(), rng(),
67
+ rng(), rng(), rng(), rng());
68
+ }
69
+ else
70
+ {
71
+ return _mm512_set_epi32(rng(), rng(), rng(), rng(),
72
+ rng(), rng(), rng(), rng(),
73
+ rng(), rng(), rng(), rng(),
74
+ rng(), rng(), rng(), rng());
75
+ }
76
+ }
77
+
78
+ EIGEN_STRONG_INLINE Packet16i rawbits_34(Rng& rng)
79
+ {
80
+ return rawbits(rng);
81
+ }
82
+
83
+ EIGEN_STRONG_INLINE Packet8i rawbits_half(Rng& rng)
84
+ {
85
+ if (sizeof(decltype(rng())) == 8)
86
+ {
87
+ return _mm256_set_epi64x(rng(), rng(), rng(), rng());
88
+ }
89
+ else
90
+ {
91
+ return _mm256_set_epi32(rng(), rng(), rng(), rng(),
92
+ rng(), rng(), rng(), rng());
93
+ }
94
+ }
95
+ };
96
+
97
+ template<typename Rng>
98
+ struct RawbitsMaker<Packet16i, Rng, Packet16i, Rand::RandomEngineType::packet>
99
+ {
100
+ EIGEN_STRONG_INLINE Packet16i rawbits(Rng& rng)
101
+ {
102
+ return rng();
103
+ }
104
+
105
+ EIGEN_STRONG_INLINE Packet16i rawbits_34(Rng& rng)
106
+ {
107
+ return rng();
108
+ }
109
+
110
+ EIGEN_STRONG_INLINE Packet8i rawbits_half(Rng& rng)
111
+ {
112
+ return rng.half();
113
+ }
114
+ };
115
+
116
+ template<typename Rng>
117
+ struct UniformRealUtils<Packet16f, Rng> : public RawbitsMaker<Packet16i, Rng>
118
+ {
119
+ EIGEN_STRONG_INLINE Packet16f zero_to_one(Rng& rng)
120
+ {
121
+ return pdiv(_mm512_cvtepi32_ps(pand(this->rawbits(rng), pset1<Packet16i>(0x7FFFFFFF))),
122
+ pset1<Packet16f>(0x7FFFFFFF));
123
+ }
124
+
125
+ EIGEN_STRONG_INLINE Packet16f uniform_real(Rng& rng)
126
+ {
127
+ return bit_to_ur_float(this->rawbits_34(rng));
128
+ }
129
+ };
130
+
131
+ template<typename Rng>
132
+ struct UniformRealUtils<Packet8d, Rng> : public RawbitsMaker<Packet16i, Rng>
133
+ {
134
+ EIGEN_STRONG_INLINE Packet8d zero_to_one(Rng& rng)
135
+ {
136
+ return pdiv(_mm512_cvtepi32_pd(pand(this->rawbits_half(rng), pset1<Packet8i>(0x7FFFFFFF))),
137
+ pset1<Packet8d>(0x7FFFFFFF));
138
+ }
139
+
140
+ EIGEN_STRONG_INLINE Packet8d uniform_real(Rng& rng)
141
+ {
142
+ return bit_to_ur_double(this->rawbits(rng));
143
+ }
144
+ };
145
+ }
146
+ }
147
+ #endif
@@ -2,10 +2,10 @@
2
2
  * @file MorePacketMath.h
3
3
  * @author bab2min (bab2min@gmail.com)
4
4
  * @brief
5
- * @version 0.4.1
6
- * @date 2022-08-13
5
+ * @version 0.5.1
6
+ * @date 2024-09-08
7
7
  *
8
- * @copyright Copyright (c) 2020-2021
8
+ * @copyright Copyright (c) 2020-2024
9
9
  *
10
10
  */
11
11
 
@@ -46,6 +46,9 @@ namespace Eigen
46
46
  template<>
47
47
  struct IsFloatPacket<Packet4f> : std::true_type {};
48
48
 
49
+ template<>
50
+ struct IsDoublePacket<Packet2d> : std::true_type {};
51
+
49
52
  template<>
50
53
  struct HalfPacket<Packet4i>
51
54
  {
@@ -64,6 +67,11 @@ namespace Eigen
64
67
  {
65
68
  return x;
66
69
  }
70
+
71
+ EIGEN_STRONG_INLINE Packet2d to_double(const Packet4i& x)
72
+ {
73
+ return (Packet2d)vreinterpretq_f64_s32(x);
74
+ }
67
75
  };
68
76
 
69
77
  template<>
@@ -78,6 +86,30 @@ namespace Eigen
78
86
  {
79
87
  return (Packet4i)vreinterpretq_s32_f32(x);
80
88
  }
89
+
90
+ EIGEN_STRONG_INLINE Packet2d to_double(const Packet4f& x)
91
+ {
92
+ return (Packet2d)vreinterpretq_f64_f32(x);
93
+ }
94
+ };
95
+
96
+ template<>
97
+ struct reinterpreter<Packet2d>
98
+ {
99
+ EIGEN_STRONG_INLINE Packet4f to_float(const Packet2d& x)
100
+ {
101
+ return vreinterpretq_f32_f64(x);
102
+ }
103
+
104
+ EIGEN_STRONG_INLINE Packet2d to_double(const Packet2d& x)
105
+ {
106
+ return x;
107
+ }
108
+
109
+ EIGEN_STRONG_INLINE Packet4i to_int(const Packet2d& x)
110
+ {
111
+ return vreinterpretq_s32_f64(x);
112
+ }
81
113
  };
82
114
 
83
115
  template<>
@@ -192,6 +224,18 @@ namespace Eigen
192
224
  return vreinterpretq_f32_u32(vcleq_f32(a, b));
193
225
  }
194
226
 
227
+ template<>
228
+ EIGEN_STRONG_INLINE Packet2d pcmplt<Packet2d>(const Packet2d& a, const Packet2d& b)
229
+ {
230
+ return vreinterpretq_f64_u64(vcltq_f64(a,b));
231
+ }
232
+
233
+ template<>
234
+ EIGEN_STRONG_INLINE Packet2d pcmple<Packet2d>(const Packet2d& a, const Packet2d& b)
235
+ {
236
+ return vreinterpretq_f64_u64(vcleq_f64(a,b));
237
+ }
238
+
195
239
  template<>
196
240
  EIGEN_STRONG_INLINE Packet4f pblendv(const Packet4f& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket)
197
241
  {
@@ -210,6 +254,18 @@ namespace Eigen
210
254
  return vbslq_s32(vreinterpretq_u32_s32(ifPacket), thenPacket, elsePacket);
211
255
  }
212
256
 
257
+ template<>
258
+ EIGEN_STRONG_INLINE Packet2d pblendv(const Packet2d& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket)
259
+ {
260
+ return vbslq_f64(vreinterpretq_u64_f64(ifPacket), thenPacket, elsePacket);
261
+ }
262
+
263
+ template<>
264
+ EIGEN_STRONG_INLINE Packet2d pblendv(const Packet4i& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket)
265
+ {
266
+ return vbslq_f64(vreinterpretq_u64_s32(ifPacket), thenPacket, elsePacket);
267
+ }
268
+
213
269
  template<>
214
270
  EIGEN_STRONG_INLINE Packet4i pgather<Packet4i>(const int* addr, const Packet4i& index)
215
271
  {
@@ -256,6 +312,37 @@ namespace Eigen
256
312
  {
257
313
  return vrndq_f32(a);
258
314
  }
315
+
316
+ template<>
317
+ EIGEN_STRONG_INLINE Packet4i pcast64<Packet2d, Packet4i>(const Packet2d& a)
318
+ {
319
+ return (Packet4i)vcvtq_s64_f64(a);
320
+ }
321
+
322
+ template<>
323
+ EIGEN_STRONG_INLINE Packet2d pcast64<Packet4i, Packet2d>(const Packet4i& a)
324
+ {
325
+ return vcvtq_f64_s64((int64x2_t)a);
326
+ }
327
+
328
+
329
+ template<>
330
+ EIGEN_STRONG_INLINE Packet4i padd64<Packet4i>(const Packet4i& a, const Packet4i& b)
331
+ {
332
+ return (Packet4i)vaddq_s64((int64x2_t)a, (int64x2_t)b);
333
+ }
334
+
335
+ template<>
336
+ EIGEN_STRONG_INLINE Packet4i psub64<Packet4i>(const Packet4i& a, const Packet4i& b)
337
+ {
338
+ return (Packet4i)vsubq_s64((int64x2_t)a, (int64x2_t)b);
339
+ }
340
+
341
+ template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
342
+ Packet2d psin<Packet2d>(const Packet2d& x)
343
+ {
344
+ return _psin(x);
345
+ }
259
346
 
260
347
  template<>
261
348
  EIGEN_STRONG_INLINE Packet4i pseti64<Packet4i>(uint64_t a)
@@ -279,7 +366,35 @@ namespace Eigen
279
366
  return vreinterpretq_s32_u64(vld1q_u64(u));
280
367
  }
281
368
 
369
+ template<>
370
+ EIGEN_STRONG_INLINE bool predux_all(const Packet4f& x)
371
+ {
372
+ uint32x2_t tmp = vand_u32(vget_low_u32( vreinterpretq_u32_f32(x)),
373
+ vget_high_u32(vreinterpretq_u32_f32(x)));
374
+ return vget_lane_u32(vpmin_u32(tmp, tmp), 0);
375
+ }
376
+
377
+ template<>
378
+ EIGEN_STRONG_INLINE bool predux_all(const Packet4i& x)
379
+ {
380
+ return predux_all((Packet4f)vreinterpretq_f32_s32(x));
381
+ }
382
+
282
383
  #ifdef EIGENRAND_EIGEN_33_MODE
384
+ template<>
385
+ EIGEN_STRONG_INLINE bool predux_any(const Packet4f& x)
386
+ {
387
+ uint32x2_t tmp = vorr_u32(vget_low_u32( vreinterpretq_u32_f32(x)),
388
+ vget_high_u32(vreinterpretq_u32_f32(x)));
389
+ return vget_lane_u32(vpmax_u32(tmp, tmp), 0);
390
+ }
391
+
392
+ template<>
393
+ EIGEN_STRONG_INLINE bool predux_any(const Packet4i& x)
394
+ {
395
+ return predux_any((Packet4f)vreinterpretq_f32_s32(x));
396
+ }
397
+
283
398
  template<>
284
399
  EIGEN_STRONG_INLINE Packet4f plog<Packet4f>(const Packet4f& _x)
285
400
  {