tomoto 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (420) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +3 -0
  3. data/LICENSE.txt +22 -0
  4. data/README.md +123 -0
  5. data/ext/tomoto/ext.cpp +245 -0
  6. data/ext/tomoto/extconf.rb +28 -0
  7. data/lib/tomoto.rb +12 -0
  8. data/lib/tomoto/ct.rb +11 -0
  9. data/lib/tomoto/hdp.rb +11 -0
  10. data/lib/tomoto/lda.rb +67 -0
  11. data/lib/tomoto/version.rb +3 -0
  12. data/vendor/EigenRand/EigenRand/Core.h +1139 -0
  13. data/vendor/EigenRand/EigenRand/Dists/Basic.h +111 -0
  14. data/vendor/EigenRand/EigenRand/Dists/Discrete.h +877 -0
  15. data/vendor/EigenRand/EigenRand/Dists/GammaPoisson.h +108 -0
  16. data/vendor/EigenRand/EigenRand/Dists/NormalExp.h +626 -0
  17. data/vendor/EigenRand/EigenRand/EigenRand +19 -0
  18. data/vendor/EigenRand/EigenRand/Macro.h +24 -0
  19. data/vendor/EigenRand/EigenRand/MorePacketMath.h +978 -0
  20. data/vendor/EigenRand/EigenRand/PacketFilter.h +286 -0
  21. data/vendor/EigenRand/EigenRand/PacketRandomEngine.h +624 -0
  22. data/vendor/EigenRand/EigenRand/RandUtils.h +413 -0
  23. data/vendor/EigenRand/EigenRand/doc.h +220 -0
  24. data/vendor/EigenRand/LICENSE +21 -0
  25. data/vendor/EigenRand/README.md +288 -0
  26. data/vendor/eigen/COPYING.BSD +26 -0
  27. data/vendor/eigen/COPYING.GPL +674 -0
  28. data/vendor/eigen/COPYING.LGPL +502 -0
  29. data/vendor/eigen/COPYING.MINPACK +52 -0
  30. data/vendor/eigen/COPYING.MPL2 +373 -0
  31. data/vendor/eigen/COPYING.README +18 -0
  32. data/vendor/eigen/Eigen/CMakeLists.txt +19 -0
  33. data/vendor/eigen/Eigen/Cholesky +46 -0
  34. data/vendor/eigen/Eigen/CholmodSupport +48 -0
  35. data/vendor/eigen/Eigen/Core +537 -0
  36. data/vendor/eigen/Eigen/Dense +7 -0
  37. data/vendor/eigen/Eigen/Eigen +2 -0
  38. data/vendor/eigen/Eigen/Eigenvalues +61 -0
  39. data/vendor/eigen/Eigen/Geometry +62 -0
  40. data/vendor/eigen/Eigen/Householder +30 -0
  41. data/vendor/eigen/Eigen/IterativeLinearSolvers +48 -0
  42. data/vendor/eigen/Eigen/Jacobi +33 -0
  43. data/vendor/eigen/Eigen/LU +50 -0
  44. data/vendor/eigen/Eigen/MetisSupport +35 -0
  45. data/vendor/eigen/Eigen/OrderingMethods +73 -0
  46. data/vendor/eigen/Eigen/PaStiXSupport +48 -0
  47. data/vendor/eigen/Eigen/PardisoSupport +35 -0
  48. data/vendor/eigen/Eigen/QR +51 -0
  49. data/vendor/eigen/Eigen/QtAlignedMalloc +40 -0
  50. data/vendor/eigen/Eigen/SPQRSupport +34 -0
  51. data/vendor/eigen/Eigen/SVD +51 -0
  52. data/vendor/eigen/Eigen/Sparse +36 -0
  53. data/vendor/eigen/Eigen/SparseCholesky +45 -0
  54. data/vendor/eigen/Eigen/SparseCore +69 -0
  55. data/vendor/eigen/Eigen/SparseLU +46 -0
  56. data/vendor/eigen/Eigen/SparseQR +37 -0
  57. data/vendor/eigen/Eigen/StdDeque +27 -0
  58. data/vendor/eigen/Eigen/StdList +26 -0
  59. data/vendor/eigen/Eigen/StdVector +27 -0
  60. data/vendor/eigen/Eigen/SuperLUSupport +64 -0
  61. data/vendor/eigen/Eigen/UmfPackSupport +40 -0
  62. data/vendor/eigen/Eigen/src/Cholesky/LDLT.h +673 -0
  63. data/vendor/eigen/Eigen/src/Cholesky/LLT.h +542 -0
  64. data/vendor/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +99 -0
  65. data/vendor/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +639 -0
  66. data/vendor/eigen/Eigen/src/Core/Array.h +329 -0
  67. data/vendor/eigen/Eigen/src/Core/ArrayBase.h +226 -0
  68. data/vendor/eigen/Eigen/src/Core/ArrayWrapper.h +209 -0
  69. data/vendor/eigen/Eigen/src/Core/Assign.h +90 -0
  70. data/vendor/eigen/Eigen/src/Core/AssignEvaluator.h +935 -0
  71. data/vendor/eigen/Eigen/src/Core/Assign_MKL.h +178 -0
  72. data/vendor/eigen/Eigen/src/Core/BandMatrix.h +353 -0
  73. data/vendor/eigen/Eigen/src/Core/Block.h +452 -0
  74. data/vendor/eigen/Eigen/src/Core/BooleanRedux.h +164 -0
  75. data/vendor/eigen/Eigen/src/Core/CommaInitializer.h +160 -0
  76. data/vendor/eigen/Eigen/src/Core/ConditionEstimator.h +175 -0
  77. data/vendor/eigen/Eigen/src/Core/CoreEvaluators.h +1688 -0
  78. data/vendor/eigen/Eigen/src/Core/CoreIterators.h +127 -0
  79. data/vendor/eigen/Eigen/src/Core/CwiseBinaryOp.h +184 -0
  80. data/vendor/eigen/Eigen/src/Core/CwiseNullaryOp.h +866 -0
  81. data/vendor/eigen/Eigen/src/Core/CwiseTernaryOp.h +197 -0
  82. data/vendor/eigen/Eigen/src/Core/CwiseUnaryOp.h +103 -0
  83. data/vendor/eigen/Eigen/src/Core/CwiseUnaryView.h +128 -0
  84. data/vendor/eigen/Eigen/src/Core/DenseBase.h +611 -0
  85. data/vendor/eigen/Eigen/src/Core/DenseCoeffsBase.h +681 -0
  86. data/vendor/eigen/Eigen/src/Core/DenseStorage.h +570 -0
  87. data/vendor/eigen/Eigen/src/Core/Diagonal.h +260 -0
  88. data/vendor/eigen/Eigen/src/Core/DiagonalMatrix.h +343 -0
  89. data/vendor/eigen/Eigen/src/Core/DiagonalProduct.h +28 -0
  90. data/vendor/eigen/Eigen/src/Core/Dot.h +318 -0
  91. data/vendor/eigen/Eigen/src/Core/EigenBase.h +159 -0
  92. data/vendor/eigen/Eigen/src/Core/ForceAlignedAccess.h +146 -0
  93. data/vendor/eigen/Eigen/src/Core/Fuzzy.h +155 -0
  94. data/vendor/eigen/Eigen/src/Core/GeneralProduct.h +455 -0
  95. data/vendor/eigen/Eigen/src/Core/GenericPacketMath.h +593 -0
  96. data/vendor/eigen/Eigen/src/Core/GlobalFunctions.h +187 -0
  97. data/vendor/eigen/Eigen/src/Core/IO.h +225 -0
  98. data/vendor/eigen/Eigen/src/Core/Inverse.h +118 -0
  99. data/vendor/eigen/Eigen/src/Core/Map.h +171 -0
  100. data/vendor/eigen/Eigen/src/Core/MapBase.h +303 -0
  101. data/vendor/eigen/Eigen/src/Core/MathFunctions.h +1415 -0
  102. data/vendor/eigen/Eigen/src/Core/MathFunctionsImpl.h +101 -0
  103. data/vendor/eigen/Eigen/src/Core/Matrix.h +459 -0
  104. data/vendor/eigen/Eigen/src/Core/MatrixBase.h +529 -0
  105. data/vendor/eigen/Eigen/src/Core/NestByValue.h +110 -0
  106. data/vendor/eigen/Eigen/src/Core/NoAlias.h +108 -0
  107. data/vendor/eigen/Eigen/src/Core/NumTraits.h +248 -0
  108. data/vendor/eigen/Eigen/src/Core/PermutationMatrix.h +633 -0
  109. data/vendor/eigen/Eigen/src/Core/PlainObjectBase.h +1035 -0
  110. data/vendor/eigen/Eigen/src/Core/Product.h +186 -0
  111. data/vendor/eigen/Eigen/src/Core/ProductEvaluators.h +1112 -0
  112. data/vendor/eigen/Eigen/src/Core/Random.h +182 -0
  113. data/vendor/eigen/Eigen/src/Core/Redux.h +505 -0
  114. data/vendor/eigen/Eigen/src/Core/Ref.h +283 -0
  115. data/vendor/eigen/Eigen/src/Core/Replicate.h +142 -0
  116. data/vendor/eigen/Eigen/src/Core/ReturnByValue.h +117 -0
  117. data/vendor/eigen/Eigen/src/Core/Reverse.h +211 -0
  118. data/vendor/eigen/Eigen/src/Core/Select.h +162 -0
  119. data/vendor/eigen/Eigen/src/Core/SelfAdjointView.h +352 -0
  120. data/vendor/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +47 -0
  121. data/vendor/eigen/Eigen/src/Core/Solve.h +188 -0
  122. data/vendor/eigen/Eigen/src/Core/SolveTriangular.h +235 -0
  123. data/vendor/eigen/Eigen/src/Core/SolverBase.h +130 -0
  124. data/vendor/eigen/Eigen/src/Core/StableNorm.h +221 -0
  125. data/vendor/eigen/Eigen/src/Core/Stride.h +111 -0
  126. data/vendor/eigen/Eigen/src/Core/Swap.h +67 -0
  127. data/vendor/eigen/Eigen/src/Core/Transpose.h +403 -0
  128. data/vendor/eigen/Eigen/src/Core/Transpositions.h +407 -0
  129. data/vendor/eigen/Eigen/src/Core/TriangularMatrix.h +983 -0
  130. data/vendor/eigen/Eigen/src/Core/VectorBlock.h +96 -0
  131. data/vendor/eigen/Eigen/src/Core/VectorwiseOp.h +695 -0
  132. data/vendor/eigen/Eigen/src/Core/Visitor.h +273 -0
  133. data/vendor/eigen/Eigen/src/Core/arch/AVX/Complex.h +451 -0
  134. data/vendor/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +439 -0
  135. data/vendor/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +637 -0
  136. data/vendor/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +51 -0
  137. data/vendor/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +391 -0
  138. data/vendor/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1316 -0
  139. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +430 -0
  140. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +322 -0
  141. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +1061 -0
  142. data/vendor/eigen/Eigen/src/Core/arch/CUDA/Complex.h +103 -0
  143. data/vendor/eigen/Eigen/src/Core/arch/CUDA/Half.h +674 -0
  144. data/vendor/eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h +91 -0
  145. data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +333 -0
  146. data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +1124 -0
  147. data/vendor/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +212 -0
  148. data/vendor/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +29 -0
  149. data/vendor/eigen/Eigen/src/Core/arch/Default/Settings.h +49 -0
  150. data/vendor/eigen/Eigen/src/Core/arch/NEON/Complex.h +490 -0
  151. data/vendor/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +91 -0
  152. data/vendor/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +760 -0
  153. data/vendor/eigen/Eigen/src/Core/arch/SSE/Complex.h +471 -0
  154. data/vendor/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +562 -0
  155. data/vendor/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +895 -0
  156. data/vendor/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +77 -0
  157. data/vendor/eigen/Eigen/src/Core/arch/ZVector/Complex.h +397 -0
  158. data/vendor/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +137 -0
  159. data/vendor/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +945 -0
  160. data/vendor/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +168 -0
  161. data/vendor/eigen/Eigen/src/Core/functors/BinaryFunctors.h +475 -0
  162. data/vendor/eigen/Eigen/src/Core/functors/NullaryFunctors.h +188 -0
  163. data/vendor/eigen/Eigen/src/Core/functors/StlFunctors.h +136 -0
  164. data/vendor/eigen/Eigen/src/Core/functors/TernaryFunctors.h +25 -0
  165. data/vendor/eigen/Eigen/src/Core/functors/UnaryFunctors.h +792 -0
  166. data/vendor/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +2156 -0
  167. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +492 -0
  168. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +311 -0
  169. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +145 -0
  170. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +122 -0
  171. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +619 -0
  172. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +136 -0
  173. data/vendor/eigen/Eigen/src/Core/products/Parallelizer.h +163 -0
  174. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +521 -0
  175. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +287 -0
  176. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +260 -0
  177. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +118 -0
  178. data/vendor/eigen/Eigen/src/Core/products/SelfadjointProduct.h +133 -0
  179. data/vendor/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +93 -0
  180. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +466 -0
  181. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +315 -0
  182. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +350 -0
  183. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +255 -0
  184. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +335 -0
  185. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +163 -0
  186. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverVector.h +145 -0
  187. data/vendor/eigen/Eigen/src/Core/util/BlasUtil.h +398 -0
  188. data/vendor/eigen/Eigen/src/Core/util/Constants.h +547 -0
  189. data/vendor/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +83 -0
  190. data/vendor/eigen/Eigen/src/Core/util/ForwardDeclarations.h +302 -0
  191. data/vendor/eigen/Eigen/src/Core/util/MKL_support.h +130 -0
  192. data/vendor/eigen/Eigen/src/Core/util/Macros.h +1001 -0
  193. data/vendor/eigen/Eigen/src/Core/util/Memory.h +993 -0
  194. data/vendor/eigen/Eigen/src/Core/util/Meta.h +534 -0
  195. data/vendor/eigen/Eigen/src/Core/util/NonMPL2.h +3 -0
  196. data/vendor/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +27 -0
  197. data/vendor/eigen/Eigen/src/Core/util/StaticAssert.h +218 -0
  198. data/vendor/eigen/Eigen/src/Core/util/XprHelper.h +821 -0
  199. data/vendor/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +346 -0
  200. data/vendor/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +459 -0
  201. data/vendor/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +91 -0
  202. data/vendor/eigen/Eigen/src/Eigenvalues/EigenSolver.h +622 -0
  203. data/vendor/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +418 -0
  204. data/vendor/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +226 -0
  205. data/vendor/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +374 -0
  206. data/vendor/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +158 -0
  207. data/vendor/eigen/Eigen/src/Eigenvalues/RealQZ.h +654 -0
  208. data/vendor/eigen/Eigen/src/Eigenvalues/RealSchur.h +546 -0
  209. data/vendor/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +77 -0
  210. data/vendor/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +870 -0
  211. data/vendor/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +87 -0
  212. data/vendor/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +556 -0
  213. data/vendor/eigen/Eigen/src/Geometry/AlignedBox.h +392 -0
  214. data/vendor/eigen/Eigen/src/Geometry/AngleAxis.h +247 -0
  215. data/vendor/eigen/Eigen/src/Geometry/EulerAngles.h +114 -0
  216. data/vendor/eigen/Eigen/src/Geometry/Homogeneous.h +497 -0
  217. data/vendor/eigen/Eigen/src/Geometry/Hyperplane.h +282 -0
  218. data/vendor/eigen/Eigen/src/Geometry/OrthoMethods.h +234 -0
  219. data/vendor/eigen/Eigen/src/Geometry/ParametrizedLine.h +195 -0
  220. data/vendor/eigen/Eigen/src/Geometry/Quaternion.h +814 -0
  221. data/vendor/eigen/Eigen/src/Geometry/Rotation2D.h +199 -0
  222. data/vendor/eigen/Eigen/src/Geometry/RotationBase.h +206 -0
  223. data/vendor/eigen/Eigen/src/Geometry/Scaling.h +170 -0
  224. data/vendor/eigen/Eigen/src/Geometry/Transform.h +1542 -0
  225. data/vendor/eigen/Eigen/src/Geometry/Translation.h +208 -0
  226. data/vendor/eigen/Eigen/src/Geometry/Umeyama.h +166 -0
  227. data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +161 -0
  228. data/vendor/eigen/Eigen/src/Householder/BlockHouseholder.h +103 -0
  229. data/vendor/eigen/Eigen/src/Householder/Householder.h +172 -0
  230. data/vendor/eigen/Eigen/src/Householder/HouseholderSequence.h +470 -0
  231. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +226 -0
  232. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +228 -0
  233. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +246 -0
  234. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +400 -0
  235. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +462 -0
  236. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +394 -0
  237. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +216 -0
  238. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +115 -0
  239. data/vendor/eigen/Eigen/src/Jacobi/Jacobi.h +462 -0
  240. data/vendor/eigen/Eigen/src/LU/Determinant.h +101 -0
  241. data/vendor/eigen/Eigen/src/LU/FullPivLU.h +891 -0
  242. data/vendor/eigen/Eigen/src/LU/InverseImpl.h +415 -0
  243. data/vendor/eigen/Eigen/src/LU/PartialPivLU.h +611 -0
  244. data/vendor/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +83 -0
  245. data/vendor/eigen/Eigen/src/LU/arch/Inverse_SSE.h +338 -0
  246. data/vendor/eigen/Eigen/src/MetisSupport/MetisSupport.h +137 -0
  247. data/vendor/eigen/Eigen/src/OrderingMethods/Amd.h +445 -0
  248. data/vendor/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +1843 -0
  249. data/vendor/eigen/Eigen/src/OrderingMethods/Ordering.h +157 -0
  250. data/vendor/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +678 -0
  251. data/vendor/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +543 -0
  252. data/vendor/eigen/Eigen/src/QR/ColPivHouseholderQR.h +653 -0
  253. data/vendor/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +97 -0
  254. data/vendor/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +562 -0
  255. data/vendor/eigen/Eigen/src/QR/FullPivHouseholderQR.h +676 -0
  256. data/vendor/eigen/Eigen/src/QR/HouseholderQR.h +409 -0
  257. data/vendor/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +68 -0
  258. data/vendor/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +313 -0
  259. data/vendor/eigen/Eigen/src/SVD/BDCSVD.h +1246 -0
  260. data/vendor/eigen/Eigen/src/SVD/JacobiSVD.h +804 -0
  261. data/vendor/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +91 -0
  262. data/vendor/eigen/Eigen/src/SVD/SVDBase.h +315 -0
  263. data/vendor/eigen/Eigen/src/SVD/UpperBidiagonalization.h +414 -0
  264. data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +689 -0
  265. data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +199 -0
  266. data/vendor/eigen/Eigen/src/SparseCore/AmbiVector.h +377 -0
  267. data/vendor/eigen/Eigen/src/SparseCore/CompressedStorage.h +258 -0
  268. data/vendor/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +352 -0
  269. data/vendor/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +67 -0
  270. data/vendor/eigen/Eigen/src/SparseCore/SparseAssign.h +216 -0
  271. data/vendor/eigen/Eigen/src/SparseCore/SparseBlock.h +603 -0
  272. data/vendor/eigen/Eigen/src/SparseCore/SparseColEtree.h +206 -0
  273. data/vendor/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +341 -0
  274. data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +726 -0
  275. data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +148 -0
  276. data/vendor/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +320 -0
  277. data/vendor/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +138 -0
  278. data/vendor/eigen/Eigen/src/SparseCore/SparseDot.h +98 -0
  279. data/vendor/eigen/Eigen/src/SparseCore/SparseFuzzy.h +29 -0
  280. data/vendor/eigen/Eigen/src/SparseCore/SparseMap.h +305 -0
  281. data/vendor/eigen/Eigen/src/SparseCore/SparseMatrix.h +1403 -0
  282. data/vendor/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +405 -0
  283. data/vendor/eigen/Eigen/src/SparseCore/SparsePermutation.h +178 -0
  284. data/vendor/eigen/Eigen/src/SparseCore/SparseProduct.h +169 -0
  285. data/vendor/eigen/Eigen/src/SparseCore/SparseRedux.h +49 -0
  286. data/vendor/eigen/Eigen/src/SparseCore/SparseRef.h +397 -0
  287. data/vendor/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +656 -0
  288. data/vendor/eigen/Eigen/src/SparseCore/SparseSolverBase.h +124 -0
  289. data/vendor/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +198 -0
  290. data/vendor/eigen/Eigen/src/SparseCore/SparseTranspose.h +92 -0
  291. data/vendor/eigen/Eigen/src/SparseCore/SparseTriangularView.h +189 -0
  292. data/vendor/eigen/Eigen/src/SparseCore/SparseUtil.h +178 -0
  293. data/vendor/eigen/Eigen/src/SparseCore/SparseVector.h +478 -0
  294. data/vendor/eigen/Eigen/src/SparseCore/SparseView.h +253 -0
  295. data/vendor/eigen/Eigen/src/SparseCore/TriangularSolver.h +315 -0
  296. data/vendor/eigen/Eigen/src/SparseLU/SparseLU.h +773 -0
  297. data/vendor/eigen/Eigen/src/SparseLU/SparseLUImpl.h +66 -0
  298. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +226 -0
  299. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +110 -0
  300. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +301 -0
  301. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +80 -0
  302. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +181 -0
  303. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +179 -0
  304. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +107 -0
  305. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +280 -0
  306. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +126 -0
  307. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +130 -0
  308. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +223 -0
  309. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +258 -0
  310. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +137 -0
  311. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +136 -0
  312. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +83 -0
  313. data/vendor/eigen/Eigen/src/SparseQR/SparseQR.h +745 -0
  314. data/vendor/eigen/Eigen/src/StlSupport/StdDeque.h +126 -0
  315. data/vendor/eigen/Eigen/src/StlSupport/StdList.h +106 -0
  316. data/vendor/eigen/Eigen/src/StlSupport/StdVector.h +131 -0
  317. data/vendor/eigen/Eigen/src/StlSupport/details.h +84 -0
  318. data/vendor/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +1027 -0
  319. data/vendor/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +506 -0
  320. data/vendor/eigen/Eigen/src/misc/Image.h +82 -0
  321. data/vendor/eigen/Eigen/src/misc/Kernel.h +79 -0
  322. data/vendor/eigen/Eigen/src/misc/RealSvd2x2.h +55 -0
  323. data/vendor/eigen/Eigen/src/misc/blas.h +440 -0
  324. data/vendor/eigen/Eigen/src/misc/lapack.h +152 -0
  325. data/vendor/eigen/Eigen/src/misc/lapacke.h +16291 -0
  326. data/vendor/eigen/Eigen/src/misc/lapacke_mangling.h +17 -0
  327. data/vendor/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +332 -0
  328. data/vendor/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +552 -0
  329. data/vendor/eigen/Eigen/src/plugins/BlockMethods.h +1058 -0
  330. data/vendor/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +115 -0
  331. data/vendor/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +163 -0
  332. data/vendor/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +152 -0
  333. data/vendor/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +85 -0
  334. data/vendor/eigen/README.md +3 -0
  335. data/vendor/eigen/bench/README.txt +55 -0
  336. data/vendor/eigen/bench/btl/COPYING +340 -0
  337. data/vendor/eigen/bench/btl/README +154 -0
  338. data/vendor/eigen/bench/tensors/README +21 -0
  339. data/vendor/eigen/blas/README.txt +6 -0
  340. data/vendor/eigen/demos/mandelbrot/README +10 -0
  341. data/vendor/eigen/demos/mix_eigen_and_c/README +9 -0
  342. data/vendor/eigen/demos/opengl/README +13 -0
  343. data/vendor/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md +1760 -0
  344. data/vendor/eigen/unsupported/README.txt +50 -0
  345. data/vendor/tomotopy/LICENSE +21 -0
  346. data/vendor/tomotopy/README.kr.rst +375 -0
  347. data/vendor/tomotopy/README.rst +382 -0
  348. data/vendor/tomotopy/src/Labeling/FoRelevance.cpp +362 -0
  349. data/vendor/tomotopy/src/Labeling/FoRelevance.h +88 -0
  350. data/vendor/tomotopy/src/Labeling/Labeler.h +50 -0
  351. data/vendor/tomotopy/src/TopicModel/CT.h +37 -0
  352. data/vendor/tomotopy/src/TopicModel/CTModel.cpp +13 -0
  353. data/vendor/tomotopy/src/TopicModel/CTModel.hpp +293 -0
  354. data/vendor/tomotopy/src/TopicModel/DMR.h +51 -0
  355. data/vendor/tomotopy/src/TopicModel/DMRModel.cpp +13 -0
  356. data/vendor/tomotopy/src/TopicModel/DMRModel.hpp +374 -0
  357. data/vendor/tomotopy/src/TopicModel/DT.h +65 -0
  358. data/vendor/tomotopy/src/TopicModel/DTM.h +22 -0
  359. data/vendor/tomotopy/src/TopicModel/DTModel.cpp +15 -0
  360. data/vendor/tomotopy/src/TopicModel/DTModel.hpp +572 -0
  361. data/vendor/tomotopy/src/TopicModel/GDMR.h +37 -0
  362. data/vendor/tomotopy/src/TopicModel/GDMRModel.cpp +14 -0
  363. data/vendor/tomotopy/src/TopicModel/GDMRModel.hpp +485 -0
  364. data/vendor/tomotopy/src/TopicModel/HDP.h +74 -0
  365. data/vendor/tomotopy/src/TopicModel/HDPModel.cpp +13 -0
  366. data/vendor/tomotopy/src/TopicModel/HDPModel.hpp +592 -0
  367. data/vendor/tomotopy/src/TopicModel/HLDA.h +40 -0
  368. data/vendor/tomotopy/src/TopicModel/HLDAModel.cpp +13 -0
  369. data/vendor/tomotopy/src/TopicModel/HLDAModel.hpp +681 -0
  370. data/vendor/tomotopy/src/TopicModel/HPA.h +27 -0
  371. data/vendor/tomotopy/src/TopicModel/HPAModel.cpp +21 -0
  372. data/vendor/tomotopy/src/TopicModel/HPAModel.hpp +588 -0
  373. data/vendor/tomotopy/src/TopicModel/LDA.h +144 -0
  374. data/vendor/tomotopy/src/TopicModel/LDACVB0Model.hpp +442 -0
  375. data/vendor/tomotopy/src/TopicModel/LDAModel.cpp +13 -0
  376. data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +1058 -0
  377. data/vendor/tomotopy/src/TopicModel/LLDA.h +45 -0
  378. data/vendor/tomotopy/src/TopicModel/LLDAModel.cpp +13 -0
  379. data/vendor/tomotopy/src/TopicModel/LLDAModel.hpp +203 -0
  380. data/vendor/tomotopy/src/TopicModel/MGLDA.h +63 -0
  381. data/vendor/tomotopy/src/TopicModel/MGLDAModel.cpp +17 -0
  382. data/vendor/tomotopy/src/TopicModel/MGLDAModel.hpp +558 -0
  383. data/vendor/tomotopy/src/TopicModel/PA.h +43 -0
  384. data/vendor/tomotopy/src/TopicModel/PAModel.cpp +13 -0
  385. data/vendor/tomotopy/src/TopicModel/PAModel.hpp +467 -0
  386. data/vendor/tomotopy/src/TopicModel/PLDA.h +17 -0
  387. data/vendor/tomotopy/src/TopicModel/PLDAModel.cpp +13 -0
  388. data/vendor/tomotopy/src/TopicModel/PLDAModel.hpp +214 -0
  389. data/vendor/tomotopy/src/TopicModel/SLDA.h +54 -0
  390. data/vendor/tomotopy/src/TopicModel/SLDAModel.cpp +17 -0
  391. data/vendor/tomotopy/src/TopicModel/SLDAModel.hpp +456 -0
  392. data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +692 -0
  393. data/vendor/tomotopy/src/Utils/AliasMethod.hpp +169 -0
  394. data/vendor/tomotopy/src/Utils/Dictionary.h +80 -0
  395. data/vendor/tomotopy/src/Utils/EigenAddonOps.hpp +181 -0
  396. data/vendor/tomotopy/src/Utils/LBFGS.h +202 -0
  397. data/vendor/tomotopy/src/Utils/LBFGS/LineSearchBacktracking.h +120 -0
  398. data/vendor/tomotopy/src/Utils/LBFGS/LineSearchBracketing.h +122 -0
  399. data/vendor/tomotopy/src/Utils/LBFGS/Param.h +213 -0
  400. data/vendor/tomotopy/src/Utils/LUT.hpp +82 -0
  401. data/vendor/tomotopy/src/Utils/MultiNormalDistribution.hpp +69 -0
  402. data/vendor/tomotopy/src/Utils/PolyaGamma.hpp +200 -0
  403. data/vendor/tomotopy/src/Utils/PolyaGammaHybrid.hpp +672 -0
  404. data/vendor/tomotopy/src/Utils/ThreadPool.hpp +150 -0
  405. data/vendor/tomotopy/src/Utils/Trie.hpp +220 -0
  406. data/vendor/tomotopy/src/Utils/TruncMultiNormal.hpp +94 -0
  407. data/vendor/tomotopy/src/Utils/Utils.hpp +337 -0
  408. data/vendor/tomotopy/src/Utils/avx_gamma.h +46 -0
  409. data/vendor/tomotopy/src/Utils/avx_mathfun.h +736 -0
  410. data/vendor/tomotopy/src/Utils/exception.h +28 -0
  411. data/vendor/tomotopy/src/Utils/math.h +281 -0
  412. data/vendor/tomotopy/src/Utils/rtnorm.hpp +2690 -0
  413. data/vendor/tomotopy/src/Utils/sample.hpp +192 -0
  414. data/vendor/tomotopy/src/Utils/serializer.hpp +695 -0
  415. data/vendor/tomotopy/src/Utils/slp.hpp +131 -0
  416. data/vendor/tomotopy/src/Utils/sse_gamma.h +48 -0
  417. data/vendor/tomotopy/src/Utils/sse_mathfun.h +710 -0
  418. data/vendor/tomotopy/src/Utils/text.hpp +49 -0
  419. data/vendor/tomotopy/src/Utils/tvector.hpp +543 -0
  420. metadata +531 -0
@@ -0,0 +1,19 @@
1
+ /**
2
+ * @file EigenRand
3
+ * @author bab2min (bab2min@gmail.com)
4
+ * @brief
5
+ * @version 0.2.0
6
+ * @date 2020-06-22
7
+ *
8
+ * @copyright Copyright (c) 2020
9
+ *
10
+ */
11
+
12
+ #ifndef EIGENRAND_EIGENRAND_H
13
+ #define EIGENRAND_EIGENRAND_H
14
+
15
+ #include <Eigen/Dense>
16
+ #include <EigenRand/Macro.h>
17
+ #include <EigenRand/Core.h>
18
+
19
+ #endif
@@ -0,0 +1,24 @@
1
+ /**
2
+ * @file Macro.h
3
+ * @author bab2min (bab2min@gmail.com)
4
+ * @brief
5
+ * @version 0.2.1
6
+ * @date 2020-07-11
7
+ *
8
+ * @copyright Copyright (c) 2020
9
+ *
10
+ */
11
+
12
+ #ifndef EIGENRAND_MACRO_H
13
+ #define EIGENRAND_MACRO_H
14
+
15
+ #define EIGENRAND_WORLD_VERSION 0
16
+ #define EIGENRAND_MAJOR_VERSION 2
17
+ #define EIGENRAND_MINOR_VERSION 0
18
+
19
+ #if EIGEN_VERSION_AT_LEAST(3,3,7)
20
+ #else
21
+ #error At least Eigen 3.3.7 is required.
22
+ #endif
23
+
24
+ #endif
@@ -0,0 +1,978 @@
1
+ /**
2
+ * @file MorePacketMath.h
3
+ * @author bab2min (bab2min@gmail.com)
4
+ * @brief
5
+ * @version 0.2.0
6
+ * @date 2020-06-22
7
+ *
8
+ * @copyright Copyright (c) 2020
9
+ *
10
+ */
11
+
12
+ #ifndef EIGENRAND_MORE_PACKET_MATH_H
13
+ #define EIGENRAND_MORE_PACKET_MATH_H
14
+
15
+ #include <Eigen/Dense>
16
+
17
+ namespace Eigen
18
+ {
19
+ namespace internal
20
+ {
21
+ template<typename Packet>
22
+ struct reinterpreter
23
+ {
24
+ };
25
+
26
+ template<typename Packet>
27
+ inline auto reinterpret_to_float(const Packet& x)
28
+ -> decltype(reinterpreter<Packet>{}.to_float(x))
29
+ {
30
+ return reinterpreter<Packet>{}.to_float(x);
31
+ }
32
+
33
+ template<typename Packet>
34
+ inline auto reinterpret_to_double(const Packet& x)
35
+ -> decltype(reinterpreter<Packet>{}.to_double(x))
36
+ {
37
+ return reinterpreter<Packet>{}.to_double(x);
38
+ }
39
+
40
+ template<typename Packet>
41
+ inline auto reinterpret_to_int(const Packet& x)
42
+ -> decltype(reinterpreter<Packet>{}.to_int(x))
43
+ {
44
+ return reinterpreter<Packet>{}.to_int(x);
45
+ }
46
+
47
+ template<typename Packet>
48
+ EIGEN_STRONG_INLINE Packet pseti64(uint64_t a);
49
+
50
+ template<typename Packet>
51
+ EIGEN_STRONG_INLINE Packet pcmpeq(const Packet& a, const Packet& b);
52
+
53
+ template<typename Packet>
54
+ EIGEN_STRONG_INLINE Packet psll(const Packet& a, int b);
55
+
56
+ template<typename Packet>
57
+ EIGEN_STRONG_INLINE Packet psrl(const Packet& a, int b);
58
+
59
+ template<typename Packet>
60
+ EIGEN_STRONG_INLINE Packet psll64(const Packet& a, int b);
61
+
62
+ template<typename Packet>
63
+ EIGEN_STRONG_INLINE Packet psrl64(const Packet& a, int b);
64
+
65
+ template<typename Packet>
66
+ EIGEN_STRONG_INLINE int pmovemask(const Packet& a);
67
+
68
+ template<>
69
+ EIGEN_STRONG_INLINE uint64_t psll64<uint64_t>(const uint64_t& a, int b)
70
+ {
71
+ return a << b;
72
+ }
73
+
74
+ template<>
75
+ EIGEN_STRONG_INLINE uint64_t psrl64<uint64_t>(const uint64_t& a, int b)
76
+ {
77
+ return a >> b;
78
+ }
79
+
80
+ template<typename Packet>
81
+ EIGEN_STRONG_INLINE void psincos(Packet x, Packet &s, Packet &c)
82
+ {
83
+ Packet xmm1, xmm2, xmm3 = pset1<Packet>(0), sign_bit_sin, y;
84
+ using IntPacket = decltype(reinterpret_to_int(x));
85
+ IntPacket emm0, emm2, emm4;
86
+
87
+ sign_bit_sin = x;
88
+ /* take the absolute value */
89
+ x = pabs(x);
90
+ /* extract the sign bit (upper one) */
91
+ sign_bit_sin = reinterpret_to_float(
92
+ pand(reinterpret_to_int(sign_bit_sin), pset1<IntPacket>(0x80000000))
93
+ );
94
+
95
+ /* scale by 4/Pi */
96
+ y = pmul(x, pset1<Packet>(1.27323954473516));
97
+
98
+ /* store the integer part of y in emm2 */
99
+ emm2 = pcast<Packet, IntPacket>(y);
100
+
101
+ /* j=(j+1) & (~1) (see the cephes sources) */
102
+ emm2 = padd(emm2, pset1<IntPacket>(1));
103
+ emm2 = pand(emm2, pset1<IntPacket>(~1));
104
+ y = pcast<IntPacket, Packet>(emm2);
105
+
106
+ emm4 = emm2;
107
+
108
+ /* get the swap sign flag for the sine */
109
+ emm0 = pand(emm2, pset1<IntPacket>(4));
110
+ emm0 = psll(emm0, 29);
111
+ Packet swap_sign_bit_sin = reinterpret_to_float(emm0);
112
+
113
+ /* get the polynom selection mask for the sine*/
114
+ emm2 = pand(emm2, pset1<IntPacket>(2));
115
+
116
+ emm2 = pcmpeq(emm2, pset1<IntPacket>(0));
117
+ Packet poly_mask = reinterpret_to_float(emm2);
118
+
119
+ /* The magic pass: "Extended precision modular arithmetic"
120
+ x = ((x - y * DP1) - y * DP2) - y * DP3; */
121
+ xmm1 = pset1<Packet>(-0.78515625);
122
+ xmm2 = pset1<Packet>(-2.4187564849853515625e-4);
123
+ xmm3 = pset1<Packet>(-3.77489497744594108e-8);
124
+ xmm1 = pmul(y, xmm1);
125
+ xmm2 = pmul(y, xmm2);
126
+ xmm3 = pmul(y, xmm3);
127
+ x = padd(x, xmm1);
128
+ x = padd(x, xmm2);
129
+ x = padd(x, xmm3);
130
+
131
+ emm4 = psub(emm4, pset1<IntPacket>(2));
132
+ emm4 = pandnot(emm4, pset1<IntPacket>(4));
133
+ emm4 = psll(emm4, 29);
134
+ Packet sign_bit_cos = reinterpret_to_float(emm4);
135
+ sign_bit_sin = pxor(sign_bit_sin, swap_sign_bit_sin);
136
+
137
+
138
+ /* Evaluate the first polynom (0 <= x <= Pi/4) */
139
+ Packet z = pmul(x, x);
140
+ y = pset1<Packet>(2.443315711809948E-005);
141
+
142
+ y = pmul(y, z);
143
+ y = padd(y, pset1<Packet>(-1.388731625493765E-003));
144
+ y = pmul(y, z);
145
+ y = padd(y, pset1<Packet>(4.166664568298827E-002));
146
+ y = pmul(y, z);
147
+ y = pmul(y, z);
148
+ Packet tmp = pmul(z, pset1<Packet>(0.5));
149
+ y = psub(y, tmp);
150
+ y = padd(y, pset1<Packet>(1));
151
+
152
+ /* Evaluate the second polynom (Pi/4 <= x <= 0) */
153
+
154
+ Packet y2 = pset1<Packet>(-1.9515295891E-4);
155
+ y2 = pmul(y2, z);
156
+ y2 = padd(y2, pset1<Packet>(8.3321608736E-3));
157
+ y2 = pmul(y2, z);
158
+ y2 = padd(y2, pset1<Packet>(-1.6666654611E-1));
159
+ y2 = pmul(y2, z);
160
+ y2 = pmul(y2, x);
161
+ y2 = padd(y2, x);
162
+
163
+ /* select the correct result from the two polynoms */
164
+ xmm3 = poly_mask;
165
+ Packet ysin2 = pand(xmm3, y2);
166
+ Packet ysin1 = pandnot(xmm3, y);
167
+ y2 = psub(y2, ysin2);
168
+ y = psub(y, ysin1);
169
+
170
+ xmm1 = padd(ysin1, ysin2);
171
+ xmm2 = padd(y, y2);
172
+
173
+ /* update the sign */
174
+ s = pxor(xmm1, sign_bit_sin);
175
+ c = pxor(xmm2, sign_bit_cos);
176
+ }
177
+
178
+ // approximation : lgamma(z) ~= (z+2.5)ln(z+3) - z - 3 + 0.5 ln (2pi) + 1/12/(z + 3) - ln (z(z+1)(z+2))
179
+ template<typename Packet>
180
+ EIGEN_STRONG_INLINE Packet plgamma(const Packet& x)
181
+ {
182
+ auto x_3 = padd(x, pset1<Packet>(3));
183
+ auto ret = pmul(padd(x_3, pset1<Packet>(-0.5)), plog(x_3));
184
+ ret = psub(ret, x_3);
185
+ ret = padd(ret, pset1<Packet>(0.9189385332046727));
186
+ ret = padd(ret, pdiv(pset1<Packet>(1 / 12.), x_3));
187
+ ret = psub(ret, plog(pmul(
188
+ pmul(psub(x_3, pset1<Packet>(1)), psub(x_3, pset1<Packet>(2))), x)));
189
+ return ret;
190
+ }
191
+
192
+ template<typename Packet>
193
+ EIGEN_STRONG_INLINE Packet pcmplt(const Packet& a, const Packet& b);
194
+
195
+ template<typename Packet>
196
+ EIGEN_STRONG_INLINE Packet pcmple(const Packet& a, const Packet& b);
197
+
198
+ template<typename Packet>
199
+ EIGEN_STRONG_INLINE Packet pblendv(const Packet& ifPacket, const Packet& thenPacket, const Packet& elsePacket);
200
+
201
+ template<typename Packet>
202
+ EIGEN_STRONG_INLINE Packet pgather(const int* addr, const Packet& index);
203
+
204
+ template<typename Packet>
205
+ EIGEN_STRONG_INLINE auto pgather(const float* addr, const Packet& index) -> decltype(reinterpret_to_float(std::declval<Packet>()));
206
+
207
+ template<typename Packet>
208
+ EIGEN_STRONG_INLINE auto pgather(const double* addr, const Packet& index, bool upperhalf = false) -> decltype(reinterpret_to_double(std::declval<Packet>()));
209
+
210
+ template<typename Packet>
211
+ EIGEN_STRONG_INLINE Packet ptruncate(const Packet& a);
212
+
213
+ template<typename Packet>
214
+ EIGEN_STRONG_INLINE Packet pcmpeq64(const Packet& a, const Packet& b);
215
+
216
+ template<typename Packet>
217
+ EIGEN_STRONG_INLINE Packet pmuluadd64(const Packet& a, uint64_t b, uint64_t c);
218
+
219
+ template<typename IntPacket>
220
+ EIGEN_STRONG_INLINE auto bit_to_ur_float(const IntPacket& x) -> decltype(reinterpret_to_float(x))
221
+ {
222
+ using FloatPacket = decltype(reinterpret_to_float(x));
223
+
224
+ const IntPacket lower = pset1<IntPacket>(0x7FFFFF),
225
+ upper = pset1<IntPacket>(127 << 23);
226
+ const FloatPacket one = pset1<FloatPacket>(1);
227
+
228
+ return psub(reinterpret_to_float(por(pand(x, lower), upper)), one);
229
+ }
230
+
231
+ template<typename IntPacket>
232
+ EIGEN_STRONG_INLINE auto bit_to_ur_double(const IntPacket& x) -> decltype(reinterpret_to_double(x))
233
+ {
234
+ using DoublePacket = decltype(reinterpret_to_double(x));
235
+
236
+ const IntPacket lower = pseti64<IntPacket>(0xFFFFFFFFFFFFFull),
237
+ upper = pseti64<IntPacket>(1023ull << 52);
238
+ const DoublePacket one = pset1<DoublePacket>(1);
239
+
240
+ return psub(reinterpret_to_double(por(pand(x, lower), upper)), one);
241
+ }
242
+
243
+ template<typename Scalar>
244
+ struct bit_scalar;
245
+
246
+ template<>
247
+ struct bit_scalar<float>
248
+ {
249
+ float to_ur(uint32_t x)
250
+ {
251
+ union
252
+ {
253
+ uint32_t u;
254
+ float f;
255
+ };
256
+ u = (x & 0x7FFFFF) | (127 << 23);
257
+ return f - 1.f;
258
+ }
259
+
260
+ float to_nzur(uint32_t x)
261
+ {
262
+ return to_ur(x) + std::numeric_limits<float>::epsilon() / 8;
263
+ }
264
+ };
265
+
266
+ template<>
267
+ struct bit_scalar<double>
268
+ {
269
+ double to_ur(uint64_t x)
270
+ {
271
+ union
272
+ {
273
+ uint64_t u;
274
+ double f;
275
+ };
276
+ u = (x & 0xFFFFFFFFFFFFFull) | (1023ull << 52);
277
+ return f - 1.;
278
+ }
279
+
280
+ double to_nzur(uint64_t x)
281
+ {
282
+ return to_ur(x) + std::numeric_limits<double>::epsilon() / 8;
283
+ }
284
+ };
285
+
286
+
287
+ struct float2
288
+ {
289
+ float f[2];
290
+ };
291
+
292
+ EIGEN_STRONG_INLINE float2 bit_to_ur_float(uint64_t x)
293
+ {
294
+ bit_scalar<float> bs;
295
+ float2 ret;
296
+ ret.f[0] = bs.to_ur(x & 0xFFFFFFFF);
297
+ ret.f[1] = bs.to_ur(x >> 32);
298
+ return ret;
299
+ }
300
+ }
301
+ }
302
+
303
+ #ifdef EIGEN_VECTORIZE_AVX
304
+ #include <immintrin.h>
305
+
306
+ namespace Eigen
307
+ {
308
+ namespace internal
309
+ {
310
+ template<>
311
+ struct reinterpreter<Packet8i>
312
+ {
313
+ EIGEN_STRONG_INLINE Packet8f to_float(const Packet8i& x)
314
+ {
315
+ return _mm256_castsi256_ps(x);
316
+ }
317
+
318
+ EIGEN_STRONG_INLINE Packet4d to_double(const Packet8i& x)
319
+ {
320
+ return _mm256_castsi256_pd(x);
321
+ }
322
+
323
+ EIGEN_STRONG_INLINE Packet8i to_int(const Packet8i& x)
324
+ {
325
+ return x;
326
+ }
327
+ };
328
+
329
+ template<>
330
+ struct reinterpreter<Packet8f>
331
+ {
332
+ EIGEN_STRONG_INLINE Packet8f to_float(const Packet8f& x)
333
+ {
334
+ return x;
335
+ }
336
+
337
+ EIGEN_STRONG_INLINE Packet4d to_double(const Packet8f& x)
338
+ {
339
+ return _mm256_castps_pd(x);
340
+ }
341
+
342
+ EIGEN_STRONG_INLINE Packet8i to_int(const Packet8f& x)
343
+ {
344
+ return _mm256_castps_si256(x);
345
+ }
346
+ };
347
+
348
+ template<>
349
+ struct reinterpreter<Packet4d>
350
+ {
351
+ EIGEN_STRONG_INLINE Packet8f to_float(const Packet4d& x)
352
+ {
353
+ return _mm256_castpd_ps(x);
354
+ }
355
+
356
+ EIGEN_STRONG_INLINE Packet4d to_double(const Packet4d& x)
357
+ {
358
+ return x;
359
+ }
360
+
361
+ EIGEN_STRONG_INLINE Packet8i to_int(const Packet4d& x)
362
+ {
363
+ return _mm256_castpd_si256(x);
364
+ }
365
+ };
366
+
367
+ EIGEN_STRONG_INLINE void split_two(const Packet8i& x, Packet4i& a, Packet4i& b)
368
+ {
369
+ a = _mm256_extractf128_si256(x, 0);
370
+ b = _mm256_extractf128_si256(x, 1);
371
+ }
372
+
373
+ EIGEN_STRONG_INLINE Packet8i combine_two(const Packet4i& a, const Packet4i& b)
374
+ {
375
+ return _mm256_insertf128_si256(_mm256_castsi128_si256(a), b, 1);
376
+ }
377
+
378
+ EIGEN_STRONG_INLINE void split_two(const Packet8f& x, Packet4f& a, Packet4f& b)
379
+ {
380
+ a = _mm256_extractf128_ps(x, 0);
381
+ b = _mm256_extractf128_ps(x, 1);
382
+ }
383
+
384
+ EIGEN_STRONG_INLINE Packet8f combine_two(const Packet4f& a, const Packet4f& b)
385
+ {
386
+ return _mm256_insertf128_ps(_mm256_castps128_ps256(a), b, 1);
387
+ }
388
+
389
+
390
+ EIGEN_STRONG_INLINE Packet4i combine_low32(const Packet8i& a)
391
+ {
392
+ #ifdef EIGEN_VECTORIZE_AVX2
393
+ return _mm256_castsi256_si128(_mm256_permutevar8x32_epi32(a, _mm256_setr_epi32(0, 2, 4, 6, 1, 3, 5, 7)));
394
+ #else
395
+ auto sc = _mm256_permutevar_ps(_mm256_castsi256_ps(a), _mm256_setr_epi32(0, 2, 1, 3, 1, 3, 0, 2));
396
+ return _mm_castps_si128(_mm_blend_ps(_mm256_extractf128_ps(sc, 0), _mm256_extractf128_ps(sc, 1), 0b1100));
397
+ #endif
398
+ }
399
+
400
+ template<>
401
+ EIGEN_STRONG_INLINE Packet8i pseti64<Packet8i>(uint64_t a)
402
+ {
403
+ return _mm256_set1_epi64x(a);
404
+ }
405
+
406
+ template<>
407
+ EIGEN_STRONG_INLINE Packet8i pcmpeq<Packet8i>(const Packet8i& a, const Packet8i& b)
408
+ {
409
+ #ifdef EIGEN_VECTORIZE_AVX2
410
+ return _mm256_cmpeq_epi32(a, b);
411
+ #else
412
+ Packet4i a1, a2, b1, b2;
413
+ split_two(a, a1, a2);
414
+ split_two(b, b1, b2);
415
+ return combine_two(_mm_cmpeq_epi32(a1, b1), _mm_cmpeq_epi32(a2, b2));
416
+ #endif
417
+ }
418
+
419
+ template<>
420
+ EIGEN_STRONG_INLINE Packet8i psll<Packet8i>(const Packet8i& a, int b)
421
+ {
422
+ #ifdef EIGEN_VECTORIZE_AVX2
423
+ return _mm256_slli_epi32(a, b);
424
+ #else
425
+ Packet4i a1, a2;
426
+ split_two(a, a1, a2);
427
+ return combine_two(_mm_slli_epi32(a1, b), _mm_slli_epi32(a2, b));
428
+ #endif
429
+ }
430
+
431
+ template<>
432
+ EIGEN_STRONG_INLINE Packet8i psrl<Packet8i>(const Packet8i& a, int b)
433
+ {
434
+ #ifdef EIGEN_VECTORIZE_AVX2
435
+ return _mm256_srli_epi32(a, b);
436
+ #else
437
+ Packet4i a1, a2;
438
+ split_two(a, a1, a2);
439
+ return combine_two(_mm_srli_epi32(a1, b), _mm_srli_epi32(a2, b));
440
+ #endif
441
+ }
442
+
443
+ template<>
444
+ EIGEN_STRONG_INLINE Packet8i psll64<Packet8i>(const Packet8i& a, int b)
445
+ {
446
+ #ifdef EIGEN_VECTORIZE_AVX2
447
+ return _mm256_slli_epi64(a, b);
448
+ #else
449
+ Packet4i a1, a2;
450
+ split_two(a, a1, a2);
451
+ return combine_two(_mm_slli_epi64(a1, b), _mm_slli_epi64(a2, b));
452
+ #endif
453
+ }
454
+
455
+ template<>
456
+ EIGEN_STRONG_INLINE Packet8i psrl64<Packet8i>(const Packet8i& a, int b)
457
+ {
458
+ #ifdef EIGEN_VECTORIZE_AVX2
459
+ return _mm256_srli_epi64(a, b);
460
+ #else
461
+ Packet4i a1, a2;
462
+ split_two(a, a1, a2);
463
+ return combine_two(_mm_srli_epi64(a1, b), _mm_srli_epi64(a2, b));
464
+ #endif
465
+ }
466
+
467
+ template<> EIGEN_STRONG_INLINE Packet8i padd<Packet8i>(const Packet8i& a, const Packet8i& b)
468
+ {
469
+ #ifdef EIGEN_VECTORIZE_AVX2
470
+ return _mm256_add_epi32(a, b);
471
+ #else
472
+ Packet4i a1, a2, b1, b2;
473
+ split_two(a, a1, a2);
474
+ split_two(b, b1, b2);
475
+ return combine_two(_mm_add_epi32(a1, b1), _mm_add_epi32(a2, b2));
476
+ #endif
477
+ }
478
+
479
+ template<> EIGEN_STRONG_INLINE Packet8i psub<Packet8i>(const Packet8i& a, const Packet8i& b)
480
+ {
481
+ #ifdef EIGEN_VECTORIZE_AVX2
482
+ return _mm256_sub_epi32(a, b);
483
+ #else
484
+ Packet4i a1, a2, b1, b2;
485
+ split_two(a, a1, a2);
486
+ split_two(b, b1, b2);
487
+ return combine_two(_mm_sub_epi32(a1, b1), _mm_sub_epi32(a2, b2));
488
+ #endif
489
+ }
490
+
491
+ template<> EIGEN_STRONG_INLINE Packet8i pand<Packet8i>(const Packet8i& a, const Packet8i& b)
492
+ {
493
+ #ifdef EIGEN_VECTORIZE_AVX2
494
+ return _mm256_and_si256(a, b);
495
+ #else
496
+ return reinterpret_to_int(_mm256_and_ps(reinterpret_to_float(a), reinterpret_to_float(b)));
497
+ #endif
498
+ }
499
+
500
+ template<> EIGEN_STRONG_INLINE Packet8i pandnot<Packet8i>(const Packet8i& a, const Packet8i& b)
501
+ {
502
+ #ifdef EIGEN_VECTORIZE_AVX2
503
+ return _mm256_andnot_si256(a, b);
504
+ #else
505
+ return reinterpret_to_int(_mm256_andnot_ps(reinterpret_to_float(a), reinterpret_to_float(b)));
506
+ #endif
507
+ }
508
+
509
+ template<> EIGEN_STRONG_INLINE Packet8i por<Packet8i>(const Packet8i& a, const Packet8i& b)
510
+ {
511
+ #ifdef EIGEN_VECTORIZE_AVX2
512
+ return _mm256_or_si256(a, b);
513
+ #else
514
+ return reinterpret_to_int(_mm256_or_ps(reinterpret_to_float(a), reinterpret_to_float(b)));
515
+ #endif
516
+ }
517
+
518
+ template<> EIGEN_STRONG_INLINE Packet8i pxor<Packet8i>(const Packet8i& a, const Packet8i& b)
519
+ {
520
+ #ifdef EIGEN_VECTORIZE_AVX2
521
+ return _mm256_xor_si256(a, b);
522
+ #else
523
+ return reinterpret_to_int(_mm256_xor_ps(reinterpret_to_float(a), reinterpret_to_float(b)));
524
+ #endif
525
+ }
526
+
527
+ template<>
528
+ EIGEN_STRONG_INLINE Packet8i pcmplt<Packet8i>(const Packet8i& a, const Packet8i& b)
529
+ {
530
+ return _mm256_cmpgt_epi32(b, a);
531
+ }
532
+
533
+ template<>
534
+ EIGEN_STRONG_INLINE Packet8f pcmplt<Packet8f>(const Packet8f& a, const Packet8f& b)
535
+ {
536
+ return _mm256_cmp_ps(a, b, _CMP_LT_OQ);
537
+ }
538
+
539
+ template<>
540
+ EIGEN_STRONG_INLINE Packet8f pcmple<Packet8f>(const Packet8f& a, const Packet8f& b)
541
+ {
542
+ return _mm256_cmp_ps(a, b, _CMP_LE_OQ);
543
+ }
544
+
545
+ template<>
546
+ EIGEN_STRONG_INLINE Packet4d pcmplt<Packet4d>(const Packet4d& a, const Packet4d& b)
547
+ {
548
+ return _mm256_cmp_pd(a, b, _CMP_LT_OQ);
549
+ }
550
+
551
+ template<>
552
+ EIGEN_STRONG_INLINE Packet4d pcmple<Packet4d>(const Packet4d& a, const Packet4d& b)
553
+ {
554
+ return _mm256_cmp_pd(a, b, _CMP_LE_OQ);
555
+ }
556
+
557
+ template<>
558
+ EIGEN_STRONG_INLINE Packet8f pblendv(const Packet8f& ifPacket, const Packet8f& thenPacket, const Packet8f& elsePacket)
559
+ {
560
+ return _mm256_blendv_ps(elsePacket, thenPacket, ifPacket);
561
+ }
562
+
563
+ template<>
564
+ EIGEN_STRONG_INLINE Packet8i pblendv(const Packet8i& ifPacket, const Packet8i& thenPacket, const Packet8i& elsePacket)
565
+ {
566
+ return _mm256_castps_si256(_mm256_blendv_ps(
567
+ _mm256_castsi256_ps(elsePacket),
568
+ _mm256_castsi256_ps(thenPacket),
569
+ _mm256_castsi256_ps(ifPacket)
570
+ ));
571
+ }
572
+
573
+ template<>
574
+ EIGEN_STRONG_INLINE Packet4d pblendv(const Packet4d& ifPacket, const Packet4d& thenPacket, const Packet4d& elsePacket)
575
+ {
576
+ return _mm256_blendv_pd(elsePacket, thenPacket, ifPacket);
577
+ }
578
+
579
+ template<>
580
+ EIGEN_STRONG_INLINE Packet8i pgather<Packet8i>(const int* addr, const Packet8i& index)
581
+ {
582
+ #ifdef EIGEN_VECTORIZE_AVX2
583
+ return _mm256_i32gather_epi32(addr, index, 4);
584
+ #else
585
+ uint32_t u[8];
586
+ _mm256_storeu_si256((Packet8i*)u, index);
587
+ return _mm256_setr_epi32(addr[u[0]], addr[u[1]], addr[u[2]], addr[u[3]],
588
+ addr[u[4]], addr[u[5]], addr[u[6]], addr[u[7]]);
589
+ #endif
590
+ }
591
+
592
+ template<>
593
+ EIGEN_STRONG_INLINE Packet8f pgather<Packet8i>(const float *addr, const Packet8i& index)
594
+ {
595
+ #ifdef EIGEN_VECTORIZE_AVX2
596
+ return _mm256_i32gather_ps(addr, index, 4);
597
+ #else
598
+ uint32_t u[8];
599
+ _mm256_storeu_si256((Packet8i*)u, index);
600
+ return _mm256_setr_ps(addr[u[0]], addr[u[1]], addr[u[2]], addr[u[3]],
601
+ addr[u[4]], addr[u[5]], addr[u[6]], addr[u[7]]);
602
+ #endif
603
+ }
604
+
605
+ template<>
606
+ EIGEN_STRONG_INLINE Packet4d pgather<Packet8i>(const double *addr, const Packet8i& index, bool upperhalf)
607
+ {
608
+ #ifdef EIGEN_VECTORIZE_AVX2
609
+ return _mm256_i32gather_pd(addr, _mm256_castsi256_si128(index), 8);
610
+ #else
611
+ uint32_t u[8];
612
+ _mm256_storeu_si256((Packet8i*)u, index);
613
+ if (upperhalf)
614
+ {
615
+ return _mm256_setr_pd(addr[u[4]], addr[u[5]], addr[u[6]], addr[u[7]]);
616
+ }
617
+ else
618
+ {
619
+ return _mm256_setr_pd(addr[u[0]], addr[u[1]], addr[u[2]], addr[u[3]]);
620
+ }
621
+ #endif
622
+ }
623
+
624
+ template<>
625
+ EIGEN_STRONG_INLINE int pmovemask<Packet8f>(const Packet8f& a)
626
+ {
627
+ return _mm256_movemask_ps(a);
628
+ }
629
+
630
+ template<>
631
+ EIGEN_STRONG_INLINE int pmovemask<Packet4d>(const Packet4d& a)
632
+ {
633
+ return _mm256_movemask_pd(a);
634
+ }
635
+
636
+ template<>
637
+ EIGEN_STRONG_INLINE int pmovemask<Packet8i>(const Packet8i& a)
638
+ {
639
+ return pmovemask(_mm256_castsi256_ps(a));
640
+ }
641
+
642
+ template<>
643
+ EIGEN_STRONG_INLINE Packet8f ptruncate<Packet8f>(const Packet8f& a)
644
+ {
645
+ return _mm256_round_ps(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
646
+ }
647
+
648
+ template<>
649
+ EIGEN_STRONG_INLINE Packet4d ptruncate<Packet4d>(const Packet4d& a)
650
+ {
651
+ return _mm256_round_pd(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
652
+ }
653
+
654
+ template<>
655
+ EIGEN_STRONG_INLINE Packet8i pcmpeq64<Packet8i>(const Packet8i& a, const Packet8i& b)
656
+ {
657
+ #ifdef EIGEN_VECTORIZE_AVX2
658
+ return _mm256_cmpeq_epi64(a, b);
659
+ #else
660
+ Packet4i a1, a2, b1, b2;
661
+ split_two(a, a1, a2);
662
+ split_two(b, b1, b2);
663
+ return combine_two(_mm_cmpeq_epi64(a1, b1), _mm_cmpeq_epi64(a2, b2));
664
+ #endif
665
+ }
666
+
667
+ template<>
668
+ EIGEN_STRONG_INLINE Packet8i pmuluadd64<Packet8i>(const Packet8i& a, uint64_t b, uint64_t c)
669
+ {
670
+ uint64_t u[4];
671
+ _mm256_storeu_si256((__m256i*)u, a);
672
+ u[0] = u[0] * b + c;
673
+ u[1] = u[1] * b + c;
674
+ u[2] = u[2] * b + c;
675
+ u[3] = u[3] * b + c;
676
+ return _mm256_loadu_si256((__m256i*)u);
677
+ }
678
+ }
679
+ }
680
+ #endif
681
+
682
+ #ifdef EIGEN_VECTORIZE_SSE2
683
+ #include <xmmintrin.h>
684
+
685
+ namespace Eigen
686
+ {
687
+ namespace internal
688
+ {
689
+ template<>
690
+ struct reinterpreter<Packet4i>
691
+ {
692
+ EIGEN_STRONG_INLINE Packet4f to_float(const Packet4i& x)
693
+ {
694
+ return _mm_castsi128_ps(x);
695
+ }
696
+
697
+ EIGEN_STRONG_INLINE Packet2d to_double(const Packet4i& x)
698
+ {
699
+ return _mm_castsi128_pd(x);
700
+ }
701
+
702
+ EIGEN_STRONG_INLINE Packet4i to_int(const Packet4i& x)
703
+ {
704
+ return x;
705
+ }
706
+ };
707
+
708
+ template<>
709
+ struct reinterpreter<Packet4f>
710
+ {
711
+ EIGEN_STRONG_INLINE Packet4f to_float(const Packet4f& x)
712
+ {
713
+ return x;
714
+ }
715
+
716
+ EIGEN_STRONG_INLINE Packet2d to_double(const Packet4f& x)
717
+ {
718
+ return _mm_castps_pd(x);
719
+ }
720
+
721
+ EIGEN_STRONG_INLINE Packet4i to_int(const Packet4f& x)
722
+ {
723
+ return _mm_castps_si128(x);
724
+ }
725
+ };
726
+
727
+ template<>
728
+ struct reinterpreter<Packet2d>
729
+ {
730
+ EIGEN_STRONG_INLINE Packet4f to_float(const Packet2d& x)
731
+ {
732
+ return _mm_castpd_ps(x);
733
+ }
734
+
735
+ EIGEN_STRONG_INLINE Packet2d to_double(const Packet2d& x)
736
+ {
737
+ return x;
738
+ }
739
+
740
+ EIGEN_STRONG_INLINE Packet4i to_int(const Packet2d& x)
741
+ {
742
+ return _mm_castpd_si128(x);
743
+ }
744
+ };
745
+
746
+ EIGEN_STRONG_INLINE void split_two(const Packet4i& x, uint64_t& a, uint64_t& b)
747
+ {
748
+ #ifdef EIGEN_VECTORIZE_SSE4_1
749
+ a = _mm_extract_epi64(x, 0);
750
+ b = _mm_extract_epi64(x, 1);
751
+ #else
752
+ uint64_t u[2];
753
+ _mm_storeu_si128((__m128i*)u, x);
754
+ a = u[0];
755
+ b = u[1];
756
+ #endif
757
+ }
758
+
759
+ EIGEN_STRONG_INLINE Packet4i combine_low32(const Packet4i& a, const Packet4i& b)
760
+ {
761
+ auto sa = _mm_shuffle_epi32(a, _MM_SHUFFLE(3, 1, 2, 0));
762
+ auto sb = _mm_shuffle_epi32(b, _MM_SHUFFLE(2, 0, 3, 1));
763
+ sa = _mm_and_si128(sa, _mm_setr_epi32(-1, -1, 0, 0));
764
+ sb = _mm_and_si128(sb, _mm_setr_epi32(0, 0, -1, -1));
765
+ return _mm_or_si128(sa, sb);
766
+ }
767
+
768
+ template<>
769
+ EIGEN_STRONG_INLINE Packet4i pseti64<Packet4i>(uint64_t a)
770
+ {
771
+ return _mm_set1_epi64x(a);
772
+ }
773
+
774
+ template<>
775
+ EIGEN_STRONG_INLINE Packet4i pcmpeq<Packet4i>(const Packet4i& a, const Packet4i& b)
776
+ {
777
+ return _mm_cmpeq_epi32(a, b);
778
+ }
779
+
780
+ template<>
781
+ EIGEN_STRONG_INLINE Packet4i psll<Packet4i>(const Packet4i& a, int b)
782
+ {
783
+ return _mm_slli_epi32(a, b);
784
+ }
785
+
786
+ template<>
787
+ EIGEN_STRONG_INLINE Packet4i psrl<Packet4i>(const Packet4i& a, int b)
788
+ {
789
+ return _mm_srli_epi32(a, b);
790
+ }
791
+
792
+
793
+ template<>
794
+ EIGEN_STRONG_INLINE Packet4i psll64<Packet4i>(const Packet4i& a, int b)
795
+ {
796
+ return _mm_slli_epi64(a, b);
797
+ }
798
+
799
+ template<>
800
+ EIGEN_STRONG_INLINE Packet4i psrl64<Packet4i>(const Packet4i& a, int b)
801
+ {
802
+ return _mm_srli_epi64(a, b);
803
+ }
804
+
805
+ template<>
806
+ EIGEN_STRONG_INLINE Packet4i pcmplt<Packet4i>(const Packet4i& a, const Packet4i& b)
807
+ {
808
+ return _mm_cmplt_epi32(a, b);
809
+ }
810
+
811
+ template<>
812
+ EIGEN_STRONG_INLINE Packet4f pcmplt<Packet4f>(const Packet4f& a, const Packet4f& b)
813
+ {
814
+ return _mm_cmplt_ps(a, b);
815
+ }
816
+
817
+ template<>
818
+ EIGEN_STRONG_INLINE Packet4f pcmple<Packet4f>(const Packet4f& a, const Packet4f& b)
819
+ {
820
+ return _mm_cmple_ps(a, b);
821
+ }
822
+
823
+ template<>
824
+ EIGEN_STRONG_INLINE Packet2d pcmplt<Packet2d>(const Packet2d& a, const Packet2d& b)
825
+ {
826
+ return _mm_cmplt_pd(a, b);
827
+ }
828
+
829
+ template<>
830
+ EIGEN_STRONG_INLINE Packet2d pcmple<Packet2d>(const Packet2d& a, const Packet2d& b)
831
+ {
832
+ return _mm_cmple_pd(a, b);
833
+ }
834
+
835
+ template<>
836
+ EIGEN_STRONG_INLINE Packet4f pblendv(const Packet4f& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket)
837
+ {
838
+ #ifdef EIGEN_VECTORIZE_SSE4_1
839
+ return _mm_blendv_ps(elsePacket, thenPacket, ifPacket);
840
+ #else
841
+ return _mm_or_ps(_mm_and_ps(ifPacket, thenPacket), _mm_andnot_ps(ifPacket, elsePacket));
842
+ #endif
843
+ }
844
+
845
+ template<>
846
+ EIGEN_STRONG_INLINE Packet4i pblendv(const Packet4i& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket)
847
+ {
848
+ #ifdef EIGEN_VECTORIZE_SSE4_1
849
+ return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(elsePacket), _mm_castsi128_ps(thenPacket), _mm_castsi128_ps(ifPacket)));
850
+ #else
851
+ return _mm_or_si128(_mm_and_si128(ifPacket, thenPacket), _mm_andnot_si128(ifPacket, elsePacket));
852
+ #endif
853
+ }
854
+
855
+ template<>
856
+ EIGEN_STRONG_INLINE Packet2d pblendv(const Packet2d& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket)
857
+ {
858
+ #ifdef EIGEN_VECTORIZE_SSE4_1
859
+ return _mm_blendv_pd(elsePacket, thenPacket, ifPacket);
860
+ #else
861
+ return _mm_or_pd(_mm_and_pd(ifPacket, thenPacket), _mm_andnot_pd(ifPacket, elsePacket));
862
+ #endif
863
+ }
864
+
865
+ template<>
866
+ EIGEN_STRONG_INLINE Packet4i pgather<Packet4i>(const int* addr, const Packet4i& index)
867
+ {
868
+ #ifdef EIGEN_VECTORIZE_AVX2
869
+ return _mm_i32gather_epi32(addr, index, 4);
870
+ #else
871
+ uint32_t u[4];
872
+ _mm_storeu_si128((Packet4i*)u, index);
873
+ return _mm_setr_epi32(addr[u[0]], addr[u[1]], addr[u[2]], addr[u[3]]);
874
+ #endif
875
+ }
876
+
877
+ template<>
878
+ EIGEN_STRONG_INLINE Packet4f pgather<Packet4i>(const float* addr, const Packet4i& index)
879
+ {
880
+ #ifdef EIGEN_VECTORIZE_AVX2
881
+ return _mm_i32gather_ps(addr, index, 4);
882
+ #else
883
+ uint32_t u[4];
884
+ _mm_storeu_si128((Packet4i*)u, index);
885
+ return _mm_setr_ps(addr[u[0]], addr[u[1]], addr[u[2]], addr[u[3]]);
886
+ #endif
887
+ }
888
+
889
+ template<>
890
+ EIGEN_STRONG_INLINE Packet2d pgather<Packet4i>(const double* addr, const Packet4i& index, bool upperhalf)
891
+ {
892
+ #ifdef EIGEN_VECTORIZE_AVX2
893
+ return _mm_i32gather_pd(addr, index, 8);
894
+ #else
895
+ uint32_t u[4];
896
+ _mm_storeu_si128((Packet4i*)u, index);
897
+ if (upperhalf)
898
+ {
899
+ return _mm_setr_pd(addr[u[2]], addr[u[3]]);
900
+ }
901
+ else
902
+ {
903
+ return _mm_setr_pd(addr[u[0]], addr[u[1]]);
904
+ }
905
+ #endif
906
+ }
907
+
908
+ template<>
909
+ EIGEN_STRONG_INLINE int pmovemask<Packet4f>(const Packet4f& a)
910
+ {
911
+ return _mm_movemask_ps(a);
912
+ }
913
+
914
+ template<>
915
+ EIGEN_STRONG_INLINE int pmovemask<Packet2d>(const Packet2d& a)
916
+ {
917
+ return _mm_movemask_pd(a);
918
+ }
919
+
920
+ template<>
921
+ EIGEN_STRONG_INLINE int pmovemask<Packet4i>(const Packet4i& a)
922
+ {
923
+ return pmovemask(_mm_castsi128_ps(a));
924
+ }
925
+
926
+ template<>
927
+ EIGEN_STRONG_INLINE Packet4f ptruncate<Packet4f>(const Packet4f& a)
928
+ {
929
+ #ifdef EIGEN_VECTORIZE_SSE4_1
930
+ return _mm_round_ps(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
931
+ #else
932
+ auto round = _MM_GET_ROUNDING_MODE();
933
+ _MM_SET_ROUNDING_MODE(_MM_ROUND_TOWARD_ZERO);
934
+ auto ret = _mm_cvtepi32_ps(_mm_cvtps_epi32(a));
935
+ _MM_SET_ROUNDING_MODE(round);
936
+ return ret;
937
+ #endif
938
+ }
939
+
940
+ template<>
941
+ EIGEN_STRONG_INLINE Packet2d ptruncate<Packet2d>(const Packet2d& a)
942
+ {
943
+ #ifdef EIGEN_VECTORIZE_SSE4_1
944
+ return _mm_round_pd(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
945
+ #else
946
+ auto round = _MM_GET_ROUNDING_MODE();
947
+ _MM_SET_ROUNDING_MODE(_MM_ROUND_TOWARD_ZERO);
948
+ auto ret = _mm_cvtepi32_pd(_mm_cvtpd_epi32(a));
949
+ _MM_SET_ROUNDING_MODE(round);
950
+ return ret;
951
+ #endif
952
+ }
953
+
954
+ template<>
955
+ EIGEN_STRONG_INLINE Packet4i pcmpeq64<Packet4i>(const Packet4i& a, const Packet4i& b)
956
+ {
957
+ #ifdef EIGEN_VECTORIZE_SSE4_1
958
+ return _mm_cmpeq_epi64(a, b);
959
+ #else
960
+ Packet4i c = _mm_cmpeq_epi32(a, b);
961
+ return pand(c, _mm_shuffle_epi32(c, _MM_SHUFFLE(2, 3, 0, 1)));
962
+ #endif
963
+ }
964
+
965
+ template<>
966
+ EIGEN_STRONG_INLINE Packet4i pmuluadd64<Packet4i>(const Packet4i& a, uint64_t b, uint64_t c)
967
+ {
968
+ uint64_t u[2];
969
+ _mm_storeu_si128((__m128i*)u, a);
970
+ u[0] = u[0] * b + c;
971
+ u[1] = u[1] * b + c;
972
+ return _mm_loadu_si128((__m128i*)u);
973
+ }
974
+ }
975
+ }
976
+ #endif
977
+
978
+ #endif