tomoto 0.6.0 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +9 -0
  3. data/LICENSE.txt +1 -1
  4. data/ext/tomoto/ct.cpp +1 -1
  5. data/ext/tomoto/dmr.cpp +1 -1
  6. data/ext/tomoto/dt.cpp +1 -1
  7. data/ext/tomoto/extconf.rb +4 -8
  8. data/ext/tomoto/gdmr.cpp +1 -1
  9. data/ext/tomoto/hdp.cpp +1 -1
  10. data/ext/tomoto/hlda.cpp +1 -1
  11. data/ext/tomoto/hpa.cpp +1 -1
  12. data/ext/tomoto/lda.cpp +29 -3
  13. data/ext/tomoto/llda.cpp +1 -1
  14. data/ext/tomoto/mglda.cpp +1 -1
  15. data/ext/tomoto/pa.cpp +1 -1
  16. data/ext/tomoto/plda.cpp +1 -1
  17. data/ext/tomoto/slda.cpp +1 -1
  18. data/lib/tomoto/lda.rb +1 -0
  19. data/lib/tomoto/version.rb +1 -1
  20. data/vendor/EigenRand/EigenRand/Core.h +6 -4
  21. data/vendor/EigenRand/EigenRand/CwiseHeteroBinaryOp.h +265 -0
  22. data/vendor/EigenRand/EigenRand/Dists/Basic.h +345 -12
  23. data/vendor/EigenRand/EigenRand/Dists/Discrete.h +381 -7
  24. data/vendor/EigenRand/EigenRand/Dists/GammaPoisson.h +4 -4
  25. data/vendor/EigenRand/EigenRand/Dists/NormalExp.h +576 -4
  26. data/vendor/EigenRand/EigenRand/EigenRand +4 -4
  27. data/vendor/EigenRand/EigenRand/Macro.h +3 -3
  28. data/vendor/EigenRand/EigenRand/MorePacketMath.h +31 -30
  29. data/vendor/EigenRand/EigenRand/MvDists/Multinomial.h +41 -29
  30. data/vendor/EigenRand/EigenRand/MvDists/MvNormal.h +19 -7
  31. data/vendor/EigenRand/EigenRand/PacketFilter.h +8 -5
  32. data/vendor/EigenRand/EigenRand/PacketRandomEngine.h +3 -3
  33. data/vendor/EigenRand/EigenRand/RandUtils.h +180 -5
  34. data/vendor/EigenRand/EigenRand/arch/AVX/MorePacketMath.h +42 -3
  35. data/vendor/EigenRand/EigenRand/arch/AVX/PacketFilter.h +3 -3
  36. data/vendor/EigenRand/EigenRand/arch/AVX/RandUtils.h +3 -3
  37. data/vendor/EigenRand/EigenRand/arch/AVX512/MorePacketMath.h +312 -0
  38. data/vendor/EigenRand/EigenRand/arch/AVX512/PacketFilter.h +79 -0
  39. data/vendor/EigenRand/EigenRand/arch/AVX512/RandUtils.h +147 -0
  40. data/vendor/EigenRand/EigenRand/arch/NEON/MorePacketMath.h +118 -3
  41. data/vendor/EigenRand/EigenRand/arch/NEON/PacketFilter.h +3 -3
  42. data/vendor/EigenRand/EigenRand/arch/NEON/RandUtils.h +21 -3
  43. data/vendor/EigenRand/EigenRand/arch/SSE/MorePacketMath.h +32 -4
  44. data/vendor/EigenRand/EigenRand/arch/SSE/PacketFilter.h +3 -3
  45. data/vendor/EigenRand/EigenRand/arch/SSE/RandUtils.h +3 -3
  46. data/vendor/EigenRand/EigenRand/doc.h +108 -157
  47. data/vendor/EigenRand/README.md +60 -272
  48. data/vendor/tomotopy/README.kr.rst +27 -5
  49. data/vendor/tomotopy/README.rst +27 -5
  50. data/vendor/tomotopy/README_pypi.rst +583 -0
  51. data/vendor/tomotopy/licenses_bundled/EigenRand +21 -0
  52. data/vendor/tomotopy/src/TopicModel/CT.h +1 -1
  53. data/vendor/tomotopy/src/TopicModel/CTModel.cpp +1 -1
  54. data/vendor/tomotopy/src/TopicModel/DMR.h +1 -1
  55. data/vendor/tomotopy/src/TopicModel/DMRModel.cpp +1 -1
  56. data/vendor/tomotopy/src/TopicModel/DT.h +1 -1
  57. data/vendor/tomotopy/src/TopicModel/DTModel.cpp +1 -1
  58. data/vendor/tomotopy/src/TopicModel/GDMR.h +1 -1
  59. data/vendor/tomotopy/src/TopicModel/GDMRModel.cpp +1 -1
  60. data/vendor/tomotopy/src/TopicModel/HDP.h +1 -1
  61. data/vendor/tomotopy/src/TopicModel/HDPModel.cpp +1 -1
  62. data/vendor/tomotopy/src/TopicModel/HLDA.h +1 -1
  63. data/vendor/tomotopy/src/TopicModel/HLDAModel.cpp +1 -1
  64. data/vendor/tomotopy/src/TopicModel/HPA.h +1 -1
  65. data/vendor/tomotopy/src/TopicModel/HPAModel.cpp +1 -1
  66. data/vendor/tomotopy/src/TopicModel/LDA.h +1 -1
  67. data/vendor/tomotopy/src/TopicModel/LDAModel.cpp +1 -1
  68. data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +6 -6
  69. data/vendor/tomotopy/src/TopicModel/LLDA.h +1 -1
  70. data/vendor/tomotopy/src/TopicModel/LLDAModel.cpp +1 -1
  71. data/vendor/tomotopy/src/TopicModel/MGLDA.h +1 -1
  72. data/vendor/tomotopy/src/TopicModel/MGLDAModel.cpp +1 -1
  73. data/vendor/tomotopy/src/TopicModel/PA.h +1 -1
  74. data/vendor/tomotopy/src/TopicModel/PAModel.cpp +1 -1
  75. data/vendor/tomotopy/src/TopicModel/PLDA.h +1 -1
  76. data/vendor/tomotopy/src/TopicModel/PLDAModel.cpp +1 -1
  77. data/vendor/tomotopy/src/TopicModel/PT.h +1 -1
  78. data/vendor/tomotopy/src/TopicModel/PTModel.cpp +1 -1
  79. data/vendor/tomotopy/src/TopicModel/SLDA.h +1 -1
  80. data/vendor/tomotopy/src/TopicModel/SLDAModel.cpp +1 -1
  81. data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +6 -6
  82. data/vendor/tomotopy/src/Utils/EigenAddonOps.hpp +41 -0
  83. data/vendor/tomotopy/src/Utils/ThreadPool.hpp +6 -6
  84. data/vendor/tomotopy/src/Utils/Utils.hpp +3 -3
  85. data/vendor/tomotopy/src/Utils/avx512_gamma.h +46 -0
  86. data/vendor/tomotopy/src/Utils/avx512_mathfun.h +99 -0
  87. metadata +10 -9
  88. data/vendor/variant/LICENSE +0 -25
  89. data/vendor/variant/LICENSE_1_0.txt +0 -23
  90. data/vendor/variant/README.md +0 -102
  91. data/vendor/variant/include/mapbox/optional.hpp +0 -74
  92. data/vendor/variant/include/mapbox/recursive_wrapper.hpp +0 -122
  93. data/vendor/variant/include/mapbox/variant.hpp +0 -974
  94. data/vendor/variant/include/mapbox/variant_io.hpp +0 -45
@@ -2,10 +2,10 @@
2
2
  * @file PacketFilter.h
3
3
  * @author bab2min (bab2min@gmail.com)
4
4
  * @brief
5
- * @version 0.4.1
6
- * @date 2022-08-13
5
+ * @version 0.5.1
6
+ * @date 2024-09-08
7
7
  *
8
- * @copyright Copyright (c) 2020-2021
8
+ * @copyright Copyright (c) 2020-2024
9
9
  *
10
10
  */
11
11
 
@@ -2,10 +2,10 @@
2
2
  * @file RandUtils.h
3
3
  * @author bab2min (bab2min@gmail.com)
4
4
  * @brief
5
- * @version 0.4.1
6
- * @date 2022-08-13
5
+ * @version 0.5.1
6
+ * @date 2024-09-08
7
7
  *
8
- * @copyright Copyright (c) 2020-2021
8
+ * @copyright Copyright (c) 2020-2024
9
9
  *
10
10
  */
11
11
 
@@ -116,11 +116,29 @@ namespace Eigen
116
116
  }
117
117
  };
118
118
 
119
+ #if EIGEN_ARCH_ARM64 && !EIGEN_APPLE_DOUBLE_NEON_BUG
120
+ template<typename Rng>
121
+ struct UniformRealUtils<Packet2d, Rng> : public RawbitsMaker<Packet4i, Rng>
122
+ {
123
+ EIGEN_STRONG_INLINE Packet2d zero_to_one(Rng& rng)
124
+ {
125
+ return pdiv((Packet2d)vcvtq_f64_s64(vreinterpretq_s64_s32(pand(this->rawbits(rng), vreinterpretq_s32_s64(vdupq_n_s64(0x7FFFFFFF))))),
126
+ pset1<Packet2d>(0x7FFFFFFF));
127
+ }
128
+
129
+ EIGEN_STRONG_INLINE Packet2d uniform_real(Rng& rng)
130
+ {
131
+ return bit_to_ur_double(this->rawbits(rng));
132
+ }
133
+ };
134
+
135
+ #else
119
136
  template<typename Gen, typename Urng, bool _mutable>
120
137
  struct functor_traits<scalar_rng_adaptor<Gen, double, Urng, _mutable> >
121
138
  {
122
139
  enum { Cost = HugeCost, PacketAccess = 0, IsRepeatable = false };
123
140
  };
141
+ #endif
124
142
  }
125
143
  }
126
144
  #endif
@@ -2,10 +2,10 @@
2
2
  * @file MorePacketMath.h
3
3
  * @author bab2min (bab2min@gmail.com)
4
4
  * @brief
5
- * @version 0.4.1
6
- * @date 2022-08-13
5
+ * @version 0.5.1
6
+ * @date 2024-09-08
7
7
  *
8
- * @copyright Copyright (c) 2020-2021
8
+ * @copyright Copyright (c) 2020-2024
9
9
  *
10
10
  */
11
11
 
@@ -145,6 +145,12 @@ namespace Eigen
145
145
  return _mm_cmpeq_epi32(a, b);
146
146
  }
147
147
 
148
+ template<>
149
+ EIGEN_STRONG_INLINE Packet4f pcmpeq<Packet4f>(const Packet4f& a, const Packet4f& b)
150
+ {
151
+ return _mm_cmpeq_ps(a, b);
152
+ }
153
+
148
154
  template<>
149
155
  struct BitShifter<Packet4i>
150
156
  {
@@ -414,7 +420,29 @@ namespace Eigen
414
420
  {
415
421
  return _psin(x);
416
422
  }
423
+
424
+ template<> EIGEN_STRONG_INLINE bool predux_all(const Packet4f& x)
425
+ {
426
+ return _mm_movemask_ps(x) == 0x0F;
427
+ }
428
+
429
+ template<> EIGEN_STRONG_INLINE bool predux_all(const Packet4i& x)
430
+ {
431
+ return predux_all(_mm_castsi128_ps(x));
432
+ }
433
+
417
434
  #ifdef EIGENRAND_EIGEN_33_MODE
435
+
436
+ template<> EIGEN_STRONG_INLINE bool predux_any(const Packet4f& x)
437
+ {
438
+ return !!_mm_movemask_ps(x);
439
+ }
440
+
441
+ template<> EIGEN_STRONG_INLINE bool predux_any(const Packet4i& x)
442
+ {
443
+ return predux_any(_mm_castsi128_ps(x));
444
+ }
445
+
418
446
  template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
419
447
  Packet2d plog<Packet2d>(const Packet2d& _x)
420
448
  {
@@ -494,7 +522,7 @@ namespace Eigen
494
522
  // negative arg will be NAN, 0 will be -INF
495
523
  return pblendv(iszero_mask, minus_inf, _mm_or_pd(x, invalid_mask));
496
524
  }
497
- #endif
525
+ #endif
498
526
  }
499
527
  }
500
528
 
@@ -2,10 +2,10 @@
2
2
  * @file PacketFilter.h
3
3
  * @author bab2min (bab2min@gmail.com)
4
4
  * @brief
5
- * @version 0.4.1
6
- * @date 2022-08-13
5
+ * @version 0.5.1
6
+ * @date 2024-09-08
7
7
  *
8
- * @copyright Copyright (c) 2020-2021
8
+ * @copyright Copyright (c) 2020-2024
9
9
  *
10
10
  */
11
11
 
@@ -2,10 +2,10 @@
2
2
  * @file RandUtils.h
3
3
  * @author bab2min (bab2min@gmail.com)
4
4
  * @brief
5
- * @version 0.4.1
6
- * @date 2022-08-13
5
+ * @version 0.5.1
6
+ * @date 2024-09-08
7
7
  *
8
- * @copyright Copyright (c) 2020-2021
8
+ * @copyright Copyright (c) 2020-2024
9
9
  *
10
10
  */
11
11
 
@@ -12,7 +12,7 @@
12
12
  You can get 5~10 times speed by just replacing old Eigen's Random
13
13
  or unvectorizable c++11 random number generators with EigenRand.
14
14
 
15
- EigenRand currently supports only x86-64 architecture (SSE, AVX, AVX2) and ARM64 NEON (experimental).
15
+ EigenRand currently supports only x86-64 architecture (SSE, AVX, AVX2) and ARM64 NEON.
16
16
 
17
17
  EigenRand is distributed under the MIT License.
18
18
 
@@ -85,7 +85,51 @@
85
85
 
86
86
  Every random distribution function has its corresponding -Like function.
87
87
 
88
- @section getting_started_4 Efficient Reusable Generator
88
+ @section getting_started_4 Vectorization over Parameters
89
+ EigenRand's random number generators typically accept scalar parameters.
90
+ However, certain generators can generate random numbers efficiently for an array of parameters in an element-wise manner.
91
+ You can see the full list of distributions which support the vectorization over parameters at @link list_of_supported_distribution @endlink.
92
+
93
+ @code
94
+ #include <iostream>
95
+ #include <Eigen/Dense>
96
+ #include <EigenRand/EigenRand>
97
+
98
+ using namespace Eigen;
99
+
100
+ int main()
101
+ {
102
+ Rand::P8_mt19937_64 urng{ 42 };
103
+
104
+ ArrayXf a{ 10 }, b{ 10 }, c{ 10 };
105
+ a << 1, 2, 3, 4, 5, 6, 7, 8, 9, 10;
106
+ b << 10, 12, 14, 16, 18, 20, 22, 24, 26, 28;
107
+
108
+ // You can use two array parameters.
109
+ // The shape of two parameters should be equal in this case.
110
+ c = Rand::uniformReal(urng, a, b);
111
+ std::cout << c << std::endl;
112
+ // c[0] is generated in the range [a[0], b[0]),
113
+ // c[1] is generated in the range [a[1], b[1]) ...
114
+
115
+ // Or you can provide one parameter as a scalar
116
+ // In this case, a scalar parameter is broadcast to the shape of the array parameter.
117
+ c = Rand::uniformReal(urng, -5, b);
118
+ std::cout << c << std::endl;
119
+ // c[0] is generated in the range [-5, b[0]),
120
+ // c[1] is generated in the range [-5, b[1]) ...
121
+
122
+ c = Rand::uniformReal(urng, a, 11);
123
+ std::cout << c << std::endl;
124
+ // c[0] is generated in the range [a[0], 11),
125
+ // c[1] is generated in the range [a[1], 11) ...
126
+ return 0;
127
+ }
128
+ @endcode
129
+
130
+
131
+
132
+ @section getting_started_5 Efficient Reusable Generator
89
133
  In the example above, functions, such as `Eigen::Rand::balancedLike`, `Eigen::Rand::normal` and so on, creates a generator internally each time to be called.
90
134
  If you want to generate random matrices from the same distribution, consider using Generator classes as following:
91
135
 
@@ -114,7 +158,7 @@
114
158
  }
115
159
  @endcode
116
160
 
117
- @section getting_started_5 Drawing samples from Multivariate Distribution
161
+ @section getting_started_6 Drawing samples from Multivariate Distribution
118
162
  EigenRand provides generators for some multivariate distributions.
119
163
 
120
164
  @code
@@ -169,33 +213,38 @@
169
213
  *
170
214
  @section list_of_supported_distribution_1 Random Distributions for Real types
171
215
 
172
- | Function | Generator | Scalar Type | Description | Equivalent to |
173
- |:---:|:---:|:---:|:---:|:---:|
174
- | `Eigen::Rand::balanced` | `Eigen::Rand::BalancedGen` | float, double | generates real values in the [-1, 1] range | `Eigen::DenseBase<Ty>::Random` for floating point types |
175
- | `Eigen::Rand::beta` | `Eigen::Rand::BetaGen` | float, double | generates real values on a [beta distribution](https://en.wikipedia.org/wiki/Beta_distribution) | |
176
- | `Eigen::Rand::cauchy` | `Eigen::Rand::CauchyGen` | float, double | generates real values on the [Cauchy distribution](https://en.wikipedia.org/wiki/Cauchy_distribution). | `std::cauchy_distribution` |
177
- | `Eigen::Rand::chiSquared` | `Eigen::Rand::ChiSquaredGen` | float, double | generates real values on a [chi-squared distribution](https://en.wikipedia.org/wiki/Chi-squared_distribution). | `std::chi_squared_distribution` |
178
- | `Eigen::Rand::exponential` | `Eigen::Rand::ExponentialGen` | float, double | generates real values on an [exponential distribution](https://en.wikipedia.org/wiki/Exponential_distribution). | `std::exponential_distribution` |
179
- | `Eigen::Rand::extremeValue` | `Eigen::Rand::ExtremeValueGen` | float, double | generates real values on an [extreme value distribution](https://en.wikipedia.org/wiki/Generalized_extreme_value_distribution). | `std::extreme_value_distribution` |
180
- | `Eigen::Rand::fisherF` | `Eigen::Rand::FisherFGen` | float, double | generates real values on the [Fisher's F distribution](https://en.wikipedia.org/wiki/F_distribution). | `std::fisher_f_distribution` |
181
- | `Eigen::Rand::gamma` | `Eigen::Rand::GammaGen` | float, double | generates real values on a [gamma distribution](https://en.wikipedia.org/wiki/Gamma_distribution). | `std::gamma_distribution` |
182
- | `Eigen::Rand::lognormal` | `Eigen::Rand::LognormalGen` | float, double | generates real values on a [lognormal distribution](https://en.wikipedia.org/wiki/Lognormal_distribution). | `std::lognormal_distribution` |
183
- | `Eigen::Rand::normal` | `Eigen::Rand::StdNormalGen`, `Eigen::Rand::NormalGen` | float, double | generates real values on a [normal distribution](https://en.wikipedia.org/wiki/Normal_distribution). | `std::normal_distribution` |
184
- | `Eigen::Rand::studentT` | `Eigen::Rand::StudentTGen` | float, double | generates real values on the [Student's t distribution](https://en.wikipedia.org/wiki/Student%27s_t-distribution). | `std::student_t_distribution` |
185
- | `Eigen::Rand::uniformReal` | `Eigen::Rand::StdUniformRealGen`, `Eigen::Rand::UniformRealGen` | float, double | generates real values in the `[-1, 0)` range. | `std::generate_canonical` |
186
- | `Eigen::Rand::weibull` | `Eigen::Rand::WeibullGen` | float, double | generates real values on the [Weibull distribution](https://en.wikipedia.org/wiki/Weibull_distribution). | `std::weibull_distribution` |
216
+ | Function | Generator | Scalar Type | VoP | Description | Equivalent to |
217
+ |:---:|:---:|:---:|:---:|:---:|:---:|
218
+ | `Eigen::Rand::balanced` | `Eigen::Rand::BalancedGen` | float, double | yes | generates real values in the [-1, 1] range | `Eigen::DenseBase<Ty>::Random` for floating point types |
219
+ | `Eigen::Rand::beta` | `Eigen::Rand::BetaGen` | float, double | | generates real values on a [beta distribution](https://en.wikipedia.org/wiki/Beta_distribution) | |
220
+ | `Eigen::Rand::cauchy` | `Eigen::Rand::CauchyGen` | float, double | yes | generates real values on the [Cauchy distribution](https://en.wikipedia.org/wiki/Cauchy_distribution). | `std::cauchy_distribution` |
221
+ | `Eigen::Rand::chiSquared` | `Eigen::Rand::ChiSquaredGen` | float, double | | generates real values on a [chi-squared distribution](https://en.wikipedia.org/wiki/Chi-squared_distribution). | `std::chi_squared_distribution` |
222
+ | `Eigen::Rand::exponential` | `Eigen::Rand::ExponentialGen` | float, double | yes | generates real values on an [exponential distribution](https://en.wikipedia.org/wiki/Exponential_distribution). | `std::exponential_distribution` |
223
+ | `Eigen::Rand::extremeValue` | `Eigen::Rand::ExtremeValueGen` | float, double | yes | generates real values on an [extreme value distribution](https://en.wikipedia.org/wiki/Generalized_extreme_value_distribution). | `std::extreme_value_distribution` |
224
+ | `Eigen::Rand::fisherF` | `Eigen::Rand::FisherFGen` | float, double | | generates real values on the [Fisher's F distribution](https://en.wikipedia.org/wiki/F_distribution). | `std::fisher_f_distribution` |
225
+ | `Eigen::Rand::gamma` | `Eigen::Rand::GammaGen` | float, double | | generates real values on a [gamma distribution](https://en.wikipedia.org/wiki/Gamma_distribution). | `std::gamma_distribution` |
226
+ | `Eigen::Rand::lognormal` | `Eigen::Rand::LognormalGen` | float, double | yes | generates real values on a [lognormal distribution](https://en.wikipedia.org/wiki/Lognormal_distribution). | `std::lognormal_distribution` |
227
+ | `Eigen::Rand::normal` | `Eigen::Rand::StdNormalGen`, `Eigen::Rand::NormalGen` | float, double | yes | generates real values on a [normal distribution](https://en.wikipedia.org/wiki/Normal_distribution). | `std::normal_distribution` |
228
+ | `Eigen::Rand::studentT` | `Eigen::Rand::StudentTGen` | float, double | yes | generates real values on the [Student's t distribution](https://en.wikipedia.org/wiki/Student%27s_t-distribution). | `std::student_t_distribution` |
229
+ | `Eigen::Rand::uniformReal` | `Eigen::Rand::StdUniformRealGen`, `Eigen::Rand::UniformRealGen` | float, double | yes | generates real values in the `[0, 1)` range. | `std::generate_canonical` |
230
+ | `Eigen::Rand::weibull` | `Eigen::Rand::WeibullGen` | float, double | yes | generates real values on the [Weibull distribution](https://en.wikipedia.org/wiki/Weibull_distribution). | `std::weibull_distribution` |
231
+
232
+ * VoP indicates 'Vectorization over Parameters'.
187
233
 
188
234
  @section list_of_supported_distribution_2 Random Distributions for Integer Types
189
235
 
190
- | Function | Generator | Scalar Type | Description | Equivalent to |
191
- |:---:|:---:|:---:|:---:|:---:|
192
- | `Eigen::Rand::binomial` | `Eigen::Rand::BinomialGen` | int | generates integers on a [binomial distribution](https://en.wikipedia.org/wiki/Binomial_distribution). | `std::binomial_distribution` |
193
- | `Eigen::Rand::discrete` | `Eigen::Rand::DiscreteGen` | int | generates random integers on a discrete distribution. | `std::discrete_distribution` |
194
- | `Eigen::Rand::geometric` | `Eigen::Rand::GeometricGen` | int | generates integers on a [geometric distribution](https://en.wikipedia.org/wiki/Geometric_distribution). | `std::geometric_distribution` |
195
- | `Eigen::Rand::negativeBinomial` | `Eigen::Rand::NegativeBinomialGen` | int | generates integers on a [negative binomial distribution](https://en.wikipedia.org/wiki/Negative_binomial_distribution). | `std::negative_binomial_distribution` |
196
- | `Eigen::Rand::poisson` | `Eigen::Rand::PoissonGen` | int | generates integers on the [Poisson distribution](https://en.wikipedia.org/wiki/Poisson_distribution). | `std::poisson_distribution` |
197
- | `Eigen::Rand::randBits` | `Eigen::Rand::RandbitsGen` | int | generates integers with random bits. | `Eigen::DenseBase<Ty>::Random` for integer types |
198
- | `Eigen::Rand::uniformInt` | `Eigen::Rand::UniformIntGen` | int | generates integers in the `[min, max]` range. | `std::uniform_int_distribution` |
236
+ | Function | Generator | Scalar Type | VoP | Description | Equivalent to |
237
+ |:---:|:---:|:---:|:---:|:---:|:---:|
238
+ | `Eigen::Rand::bernoulli` | `Eigen::Rand::BernoulliGen` | int | yes | generates 0 or 1 on a [Bernoulli distribution](https://en.wikipedia.org/wiki/Bernoulli_distribution). | `std::bernoulli_distribution` |
239
+ | `Eigen::Rand::binomial` | `Eigen::Rand::BinomialGen` | int | yes | generates integers on a [binomial distribution](https://en.wikipedia.org/wiki/Binomial_distribution). | `std::binomial_distribution` |
240
+ | `Eigen::Rand::discrete` | `Eigen::Rand::DiscreteGen` | int | | generates random integers on a discrete distribution. | `std::discrete_distribution` |
241
+ | `Eigen::Rand::geometric` | `Eigen::Rand::GeometricGen` | int | | generates integers on a [geometric distribution](https://en.wikipedia.org/wiki/Geometric_distribution). | `std::geometric_distribution` |
242
+ | `Eigen::Rand::negativeBinomial` | `Eigen::Rand::NegativeBinomialGen` | int | | generates integers on a [negative binomial distribution](https://en.wikipedia.org/wiki/Negative_binomial_distribution). | `std::negative_binomial_distribution` |
243
+ | `Eigen::Rand::poisson` | `Eigen::Rand::PoissonGen` | int | | generates integers on the [Poisson distribution](https://en.wikipedia.org/wiki/Poisson_distribution). | `std::poisson_distribution` |
244
+ | `Eigen::Rand::randBits` | `Eigen::Rand::RandbitsGen` | int | | generates integers with random bits. | `Eigen::DenseBase<Ty>::Random` for integer types |
245
+ | `Eigen::Rand::uniformInt` | `Eigen::Rand::UniformIntGen` | int | | generates integers in the `[min, max]` range. | `std::uniform_int_distribution` |
246
+
247
+ * VoP indicates 'Vectorization over Parameters'.
199
248
 
200
249
  @section list_of_distribution_3 Multivariate Random Distributions
201
250
  | Generator | Description | Equivalent to |
@@ -212,138 +261,40 @@
212
261
  |:---:|:---:|:---:|
213
262
  | `Eigen::Rand::Vmt19937_64` | a vectorized version of Mersenne Twister algorithm. It generates two 64bit random integers simultaneously with SSE2 and four integers with AVX2. | `std::mt19937_64` |
214
263
  | `Eigen::Rand::P8_mt19937_64` | a vectorized version of Mersenne Twister algorithm. Since it generates eight 64bit random integers simultaneously, the random values are the same regardless of architecture. | |
264
+
215
265
  *
216
266
  * @page performance Performance
217
- * The following charts show the relative speed-up of EigenRand compared to Reference(C++ std or Eigen functions). Detailed results are below the charts.
218
-
219
- @section performance_1 Overview of Results at x86-64 Architecture
220
-
221
- \image html perf_no_vect.png
222
-
223
- \image html perf_sse2.png
224
-
225
- \image html perf_avx.png
226
-
227
- \image html perf_avx2.png
228
-
229
- \image html perf_mv_part1.png
230
-
231
- \image html perf_mv_part2.png
232
-
233
- * The following result is a measure of the time in seconds it takes to generate 1M random numbers. It shows the average of 20 times.
234
-
235
- @section performance_2 Overview of Results at ARM64 NEON (experimental)
236
-
237
- \image html perf_neon_v0.3.90.png
238
-
239
- \image html perf_mv_part1_neon_v0.3.90.png
240
-
241
- \image html perf_mv_part2_neon_v0.3.90.png
242
-
243
- * The following result is a measure of the time in seconds it takes to generate 1M random numbers. It shows the average of 20 times.
244
-
245
- @section performance_3 Intel(R) Xeon(R) Platinum 8171M CPU @ 2.60GHz (Ubuntu 16.04, gcc7.5)
246
-
247
- | | C++ std (or Eigen) | EigenRand (No Vect.) | EigenRand (SSE2) | EigenRand (SSSE3) | EigenRand (AVX) | EigenRand (AVX2) |
248
- |---|---:|---:|---:|---:|---:|---:|
249
- | `balanced`* | 9.0 | 5.9 | 1.5 | 1.4 | 1.3 | 0.9 |
250
- | `balanced`(double)* | 8.7 | 6.4 | 3.3 | 2.9 | 1.7 | 1.7 |
251
- | `binomial(20, 0.5)` | 400.8 | 118.5 | 32.7 | 36.6 | 30.0 | 22.7 |
252
- | `binomial(50, 0.01)` | 71.7 | 22.5 | 7.7 | 8.3 | 7.9 | 6.6 |
253
- | `binomial(100, 0.75)` | 340.5 | 454.5 | 91.7 | 111.5 | 106.3 | 86.4 |
254
- | `cauchy` | 36.1 | 54.4 | 6.1 | 7.1 | 4.7 | 3.9 |
255
- | `chiSquared` | 80.5 | 249.5 | 64.6 | 58.0 | 29.4 | 28.8 |
256
- | `discrete`(int32) | - | 14.0 | 2.9 | 2.6 | 2.4 | 1.7 |
257
- | `discrete`(fp32) | - | 21.9 | 4.3 | 4.0 | 3.6 | 3.0 |
258
- | `discrete`(fp64) | 72.4 | 21.4 | 6.9 | 6.5 | 4.9 | 3.7 |
259
- | `exponential` | 31.0 | 25.3 | 5.5 | 5.3 | 3.3 | 2.9 |
260
- | `extremeValue` | 66.0 | 60.1 | 11.9 | 10.7 | 6.5 | 5.8 |
261
- | `fisherF(1, 1)` | 178.1 | 35.1 | 33.2 | 39.3 | 22.9 | 18.7 |
262
- | `fisherF(5, 5)` | 141.8 | 415.2 | 136.47 | 172.4 | 92.4 | 74.9 |
263
- | `gamma(0.2, 1)` | 207.8 | 211.4 | 54.6 | 51.2 | 26.9 | 27.0 |
264
- | `gamma(5, 3)` | 80.9 | 60.0 | 14.3 | 13.3 | 11.4 | 8.0 |
265
- | `gamma(10.5, 1)` | 81.1 | 248.6 | 63.3 | 58.5 | 29.2 | 28.4 |
266
- | `geometric` | 43.0 | 22.4 | 6.7 | 7.4 | 5.8 | |
267
- | `lognormal` | 66.3 | 55.4 | 12.8 | 11.8 | 6.2 | 6.2 |
268
- | `negativeBinomial(10, 0.5)` | 312.0 | 301.4 | 82.9 | 100.6 | 95.3 | 77.9 |
269
- | `negativeBinomial(20, 0.25)` | 483.4 | 575.9 | 125.0 | 158.2 | 148.4 | 119.5 |
270
- | `normal(0, 1)` | 38.1 | 28.5 | 6.8 | 6.2 | 3.8 | 3.7 |
271
- | `normal(2, 3)` | 37.6 | 29.0 | 7.3 | 6.6 | 4.0 | 3.9 |
272
- | `poisson(1)` | 31.8 | 25.2 | 9.8 | 10.8 | 9.7 | 8.2 |
273
- | `poisson(16)` | 231.8 | 274.1 | 66.2 | 80.7 | 74.4 | 64.2 |
274
- | `randBits` | 5.2 | 5.4 | 1.4 | 1.3 | 1.1 | 1.0 |
275
- | `studentT(1)` | 122.7 | 120.1 | 15.3 | 19.2 | 12.6 | 9.4 |
276
- | `studentT(20)` | 102.2 | 111.1 | 15.4 | 19.2 | 12.2 | 9.4 |
277
- | `uniformInt(0~63)` | 22.4 | 4.7 | 1.7 | 1.6 | 1.4 | 1.1 |
278
- | `uniformInt(0~100k)` | 21.8 | 10.1 | 6.2 | 6.7 | 6.6 | 5.4 |
279
- | `uniformReal` | 12.9 | 5.7 | 1.4 | 1.2 | 1.4 | 0.7 |
280
- | `weibull` | 41.0 | 35.8 | 17.7 | 15.5 | 8.5 | 8.5 |
281
-
282
- * Since there is no equivalent class to `balanced` in C++11 std, we used Eigen::DenseBase::Random instead.
283
-
284
- | | C++ std | EigenRand (No Vect.) | EigenRand (SSE2) | EigenRand (SSSE3) | EigenRand (AVX) | EigenRand (AVX2) |
285
- |---|---:|---:|---:|---:|---:|---:|
286
- | Mersenne Twister(int32) | 4.7 | 5.6 | 4.0 | 3.7 | 3.5 | 3.6 |
287
- | Mersenne Twister(int64) | 5.4 | 5.3 | 4.0 | 3.9 | 3.4 | 2.6 |
288
-
289
- | | Python 3.6 + scipy 1.5.2 + numpy 1.19.2 | EigenRand (No Vect.) | EigenRand (SSE2) | EigenRand (SSSE3) | EigenRand (AVX) | EigenRand (AVX2) |
290
- |---|---:|---:|---:|---:|---:|---:|
291
- | `Dirichlet(4)` | 6.47 | 6.60 | 2.39 | 2.49 | 1.34 | 1.67 |
292
- | `Dirichlet(100)` | 75.95 | 189.97 | 66.60 | 72.11 | 38.86 | 34.98 |
293
- | `InvWishart(4)` | 140.18 | 7.62 | 4.21 | 4.54 | 3.58 | 3.39 |
294
- | `InvWishart(50)` | 1510.47 | 1737.4 | 697.39 | 733.69 | 604.59 | 554.006 |
295
- | `Multinomial(4, t=20)` | 3.32 | 4.12 | 0.95 | 1.06 | 1.00 | 1.03 |
296
- | `Multinomial(4, t=1000)` | 3.51 | 192.51 | 35.99 | 39.58 | 27.84 | 35.45 |
297
- | `Multinomial(100, t=20)` | 69.19 | 4.80 | 2.00 | 2.20 | 2.28 | 2.09 |
298
- | `Multinomial(100, t=1000)` | 139.74 | 179.43 | 49.48 | 56.19 | 40.78 | 43.18 |
299
- | `MvNormal(4)` | 2.32 | 0.96 | 0.36 | 0.37 | 0.25 | 0.30 |
300
- | `MvNormal(100)` | 49.09 | 57.18 | 17.17 | 18.51 | 10.82 | 11.03 |
301
- | `Wishart(4)` | 71.19 | 5.28 | 2.70 | 2.93 | 2.04 | 1.94 |
302
- | `Wishart(50)` | 1185.26 | 1360.49 | 492.91 | 517.44 | 359.03 | 324.60 |
303
-
304
- @section performance_4 AMD Ryzen 7 3700x CPU @ 3.60GHz (Windows 10, MSVC2017)
305
-
306
- | | C++ std (or Eigen) | EigenRand (SSE2) | EigenRand (AVX) | EigenRand (AVX2) |
307
- |---|---:|---:|---:|---:|
308
- | `balanced`* | 20.8 | 1.9 | 2.0 | 1.4 |
309
- | `balanced`(double)* | 21.7 | 4.1 | 2.7 | 3.0 |
310
- | `binomial(20, 0.5)` | 416.0 | 27.7 | 28.9 | 29.1 |
311
- | `binomial(50, 0.01)` | 37.8 | 6.3 | 6.0 | 6.6 |
312
- | `binomial(100, 0.75)` | 309.1 | 72.4 | 66.0 | 67.0 |
313
- | `cauchy` | 42.2 | 4.8 | 5.1 | 2.7 |
314
- | `chiSquared` | 153.8 | 33.5 | 21.2 | 17.0 |
315
- | `discrete`(int32) | - | 2.4 | 2.3 | 2.5 |
316
- | `discrete`(fp32) | - | 2.6 | 2.3 | 3.5 |
317
- | `discrete`(fp64) | 55.8 | 5.1 | 4.7 | 4.3 |
318
- | `exponential` | 33.4 | 6.4 | 2.8 | 2.2 |
319
- | `extremeValue` | 39.4 | 7.8 | 4.6 | 4.0 |
320
- | `fisherF(1, 1)` | 103.9 | 25.3 | 14.9 | 11.7 |
321
- | `fisherF(5, 5)` | 295.7 | 85.5 | 58.3 | 44.8 |
322
- | `gamma(0.2, 1)` | 128.8 | 31.9 | 18.3 | 15.8 |
323
- | `gamma(5, 3)` | 156.1 | 9.7 | 8.0 | 5.0 |
324
- | `gamma(10.5, 1)` | 148.5 | 33.1 | 21.1 | 17.2 |
325
- | `geometric` | 27.1 | 6.6 | 4.3 | 4.1 |
326
- | `lognormal` | 104.0 | 6.6 | 4.7 | 3.5 |
327
- | `negativeBinomial(10, 0.5)` | 462.1 | 60.0 | 56.4 | 58.6 |
328
- | `negativeBinomial(20, 0.25)` | 357.6 | 84.5 | 80.6 | 78.4 |
329
- | `normal(0, 1)` | 48.8 | 4.2 | 3.7 | 2.3 |
330
- | `normal(2, 3)` | 48.8 | 4.5 | 3.8 | 2.4 |
331
- | `poisson(1)` | 46.4 | 7.9 | 7.4 | 8.2 |
332
- | `poisson(16)` | 192.4 | 43.2 | 40.4 | 40.9 |
333
- | `randBits` | 4.2 | 1.7 | 1.5 | 1.8 |
334
- | `studentT(1)` | 107.0 | 12.3 | 6.8 | 5.7 |
335
- | `studentT(20)` | 107.1 | 12.3 | 6.8 | 5.8 |
336
- | `uniformInt(0~63)` | 31.2 | 1.1 | 1.0 | 1.2 |
337
- | `uniformInt(0~100k)` | 27.7 | 5.6 | 5.6 | 5.4 |
338
- | `uniformReal` | 30.7 | 1.1 | 1.0 | 0.6 |
339
- | `weibull` | 46.5 | 10.6 | 6.4 | 5.2 |
340
-
341
- * Since there is no equivalent class to `balanced` in C++11 std, we used Eigen::DenseBase::Random instead.
342
-
343
- | | C++ std | EigenRand (SSE2) | EigenRand (AVX) | EigenRand (AVX2) |
344
- |---|---:|---:|---:|---:|
345
- | Mersenne Twister(int32) | 5.0 | 3.4 | 3.4 | 3.3 |
346
- | Mersenne Twister(int64) | 5.1 | 3.9 | 3.9 | 3.3 |
267
+ * The following charts show the relative speed-up of EigenRand compared to references(equivalent functions of C++ std or Eigen for univariate distributions and Scipy for multivariate distributions).
268
+
269
+ Since there is no equivalent class to `balanced` in C++11 std, we used Eigen::DenseBase::Random instead.
270
+
271
+ Cases filled with orange are generators that are slower than reference functions.
272
+
273
+ @section performance_1 Windows 2019, MSVC 19.29.30147, Intel(R) Xeon(R) Platinum 8171M CPU, AVX2, Eigen 3.4.0
274
+
275
+ \image html perf_avx2_win.png width=80%
276
+
277
+ \image html perf_avx2_win_mv1.png width=80%
278
+
279
+ \image html perf_avx2_win_mv2.png width=80%
280
+
281
+ @section performance_2 Ubuntu 18.04, gcc 7.5.0, Intel(R) Xeon(R) Platinum 8370C CPU, AVX2, Eigen 3.4.0
282
+
283
+ \image html perf_avx2_ubu.png width=80%
284
+
285
+ \image html perf_avx2_ubu_mv1.png width=80%
286
+
287
+ \image html perf_avx2_ubu_mv2.png width=80%
288
+
289
+ @section performance_3 macOS Monterey 12.2.1, clang 13.1.6, Apple M1 Pro, NEON, Eigen 3.4.0
290
+
291
+ \image html perf_neon_mac.png width=80%
292
+
293
+ \image html perf_neon_mac_mv1.png width=80%
294
+
295
+ \image html perf_neon_mac_mv2.png width=80%
296
+
297
+ You can see the detailed numerical values used to plot the above charts on the <a href="https://github.com/bab2min/EigenRand/actions/workflows/release.yml" target="_blank">Action Results of GitHub repository</a>.
347
298
 
348
299
  *
349
300
  */