tomoto 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (420) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +3 -0
  3. data/LICENSE.txt +22 -0
  4. data/README.md +123 -0
  5. data/ext/tomoto/ext.cpp +245 -0
  6. data/ext/tomoto/extconf.rb +28 -0
  7. data/lib/tomoto.rb +12 -0
  8. data/lib/tomoto/ct.rb +11 -0
  9. data/lib/tomoto/hdp.rb +11 -0
  10. data/lib/tomoto/lda.rb +67 -0
  11. data/lib/tomoto/version.rb +3 -0
  12. data/vendor/EigenRand/EigenRand/Core.h +1139 -0
  13. data/vendor/EigenRand/EigenRand/Dists/Basic.h +111 -0
  14. data/vendor/EigenRand/EigenRand/Dists/Discrete.h +877 -0
  15. data/vendor/EigenRand/EigenRand/Dists/GammaPoisson.h +108 -0
  16. data/vendor/EigenRand/EigenRand/Dists/NormalExp.h +626 -0
  17. data/vendor/EigenRand/EigenRand/EigenRand +19 -0
  18. data/vendor/EigenRand/EigenRand/Macro.h +24 -0
  19. data/vendor/EigenRand/EigenRand/MorePacketMath.h +978 -0
  20. data/vendor/EigenRand/EigenRand/PacketFilter.h +286 -0
  21. data/vendor/EigenRand/EigenRand/PacketRandomEngine.h +624 -0
  22. data/vendor/EigenRand/EigenRand/RandUtils.h +413 -0
  23. data/vendor/EigenRand/EigenRand/doc.h +220 -0
  24. data/vendor/EigenRand/LICENSE +21 -0
  25. data/vendor/EigenRand/README.md +288 -0
  26. data/vendor/eigen/COPYING.BSD +26 -0
  27. data/vendor/eigen/COPYING.GPL +674 -0
  28. data/vendor/eigen/COPYING.LGPL +502 -0
  29. data/vendor/eigen/COPYING.MINPACK +52 -0
  30. data/vendor/eigen/COPYING.MPL2 +373 -0
  31. data/vendor/eigen/COPYING.README +18 -0
  32. data/vendor/eigen/Eigen/CMakeLists.txt +19 -0
  33. data/vendor/eigen/Eigen/Cholesky +46 -0
  34. data/vendor/eigen/Eigen/CholmodSupport +48 -0
  35. data/vendor/eigen/Eigen/Core +537 -0
  36. data/vendor/eigen/Eigen/Dense +7 -0
  37. data/vendor/eigen/Eigen/Eigen +2 -0
  38. data/vendor/eigen/Eigen/Eigenvalues +61 -0
  39. data/vendor/eigen/Eigen/Geometry +62 -0
  40. data/vendor/eigen/Eigen/Householder +30 -0
  41. data/vendor/eigen/Eigen/IterativeLinearSolvers +48 -0
  42. data/vendor/eigen/Eigen/Jacobi +33 -0
  43. data/vendor/eigen/Eigen/LU +50 -0
  44. data/vendor/eigen/Eigen/MetisSupport +35 -0
  45. data/vendor/eigen/Eigen/OrderingMethods +73 -0
  46. data/vendor/eigen/Eigen/PaStiXSupport +48 -0
  47. data/vendor/eigen/Eigen/PardisoSupport +35 -0
  48. data/vendor/eigen/Eigen/QR +51 -0
  49. data/vendor/eigen/Eigen/QtAlignedMalloc +40 -0
  50. data/vendor/eigen/Eigen/SPQRSupport +34 -0
  51. data/vendor/eigen/Eigen/SVD +51 -0
  52. data/vendor/eigen/Eigen/Sparse +36 -0
  53. data/vendor/eigen/Eigen/SparseCholesky +45 -0
  54. data/vendor/eigen/Eigen/SparseCore +69 -0
  55. data/vendor/eigen/Eigen/SparseLU +46 -0
  56. data/vendor/eigen/Eigen/SparseQR +37 -0
  57. data/vendor/eigen/Eigen/StdDeque +27 -0
  58. data/vendor/eigen/Eigen/StdList +26 -0
  59. data/vendor/eigen/Eigen/StdVector +27 -0
  60. data/vendor/eigen/Eigen/SuperLUSupport +64 -0
  61. data/vendor/eigen/Eigen/UmfPackSupport +40 -0
  62. data/vendor/eigen/Eigen/src/Cholesky/LDLT.h +673 -0
  63. data/vendor/eigen/Eigen/src/Cholesky/LLT.h +542 -0
  64. data/vendor/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +99 -0
  65. data/vendor/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +639 -0
  66. data/vendor/eigen/Eigen/src/Core/Array.h +329 -0
  67. data/vendor/eigen/Eigen/src/Core/ArrayBase.h +226 -0
  68. data/vendor/eigen/Eigen/src/Core/ArrayWrapper.h +209 -0
  69. data/vendor/eigen/Eigen/src/Core/Assign.h +90 -0
  70. data/vendor/eigen/Eigen/src/Core/AssignEvaluator.h +935 -0
  71. data/vendor/eigen/Eigen/src/Core/Assign_MKL.h +178 -0
  72. data/vendor/eigen/Eigen/src/Core/BandMatrix.h +353 -0
  73. data/vendor/eigen/Eigen/src/Core/Block.h +452 -0
  74. data/vendor/eigen/Eigen/src/Core/BooleanRedux.h +164 -0
  75. data/vendor/eigen/Eigen/src/Core/CommaInitializer.h +160 -0
  76. data/vendor/eigen/Eigen/src/Core/ConditionEstimator.h +175 -0
  77. data/vendor/eigen/Eigen/src/Core/CoreEvaluators.h +1688 -0
  78. data/vendor/eigen/Eigen/src/Core/CoreIterators.h +127 -0
  79. data/vendor/eigen/Eigen/src/Core/CwiseBinaryOp.h +184 -0
  80. data/vendor/eigen/Eigen/src/Core/CwiseNullaryOp.h +866 -0
  81. data/vendor/eigen/Eigen/src/Core/CwiseTernaryOp.h +197 -0
  82. data/vendor/eigen/Eigen/src/Core/CwiseUnaryOp.h +103 -0
  83. data/vendor/eigen/Eigen/src/Core/CwiseUnaryView.h +128 -0
  84. data/vendor/eigen/Eigen/src/Core/DenseBase.h +611 -0
  85. data/vendor/eigen/Eigen/src/Core/DenseCoeffsBase.h +681 -0
  86. data/vendor/eigen/Eigen/src/Core/DenseStorage.h +570 -0
  87. data/vendor/eigen/Eigen/src/Core/Diagonal.h +260 -0
  88. data/vendor/eigen/Eigen/src/Core/DiagonalMatrix.h +343 -0
  89. data/vendor/eigen/Eigen/src/Core/DiagonalProduct.h +28 -0
  90. data/vendor/eigen/Eigen/src/Core/Dot.h +318 -0
  91. data/vendor/eigen/Eigen/src/Core/EigenBase.h +159 -0
  92. data/vendor/eigen/Eigen/src/Core/ForceAlignedAccess.h +146 -0
  93. data/vendor/eigen/Eigen/src/Core/Fuzzy.h +155 -0
  94. data/vendor/eigen/Eigen/src/Core/GeneralProduct.h +455 -0
  95. data/vendor/eigen/Eigen/src/Core/GenericPacketMath.h +593 -0
  96. data/vendor/eigen/Eigen/src/Core/GlobalFunctions.h +187 -0
  97. data/vendor/eigen/Eigen/src/Core/IO.h +225 -0
  98. data/vendor/eigen/Eigen/src/Core/Inverse.h +118 -0
  99. data/vendor/eigen/Eigen/src/Core/Map.h +171 -0
  100. data/vendor/eigen/Eigen/src/Core/MapBase.h +303 -0
  101. data/vendor/eigen/Eigen/src/Core/MathFunctions.h +1415 -0
  102. data/vendor/eigen/Eigen/src/Core/MathFunctionsImpl.h +101 -0
  103. data/vendor/eigen/Eigen/src/Core/Matrix.h +459 -0
  104. data/vendor/eigen/Eigen/src/Core/MatrixBase.h +529 -0
  105. data/vendor/eigen/Eigen/src/Core/NestByValue.h +110 -0
  106. data/vendor/eigen/Eigen/src/Core/NoAlias.h +108 -0
  107. data/vendor/eigen/Eigen/src/Core/NumTraits.h +248 -0
  108. data/vendor/eigen/Eigen/src/Core/PermutationMatrix.h +633 -0
  109. data/vendor/eigen/Eigen/src/Core/PlainObjectBase.h +1035 -0
  110. data/vendor/eigen/Eigen/src/Core/Product.h +186 -0
  111. data/vendor/eigen/Eigen/src/Core/ProductEvaluators.h +1112 -0
  112. data/vendor/eigen/Eigen/src/Core/Random.h +182 -0
  113. data/vendor/eigen/Eigen/src/Core/Redux.h +505 -0
  114. data/vendor/eigen/Eigen/src/Core/Ref.h +283 -0
  115. data/vendor/eigen/Eigen/src/Core/Replicate.h +142 -0
  116. data/vendor/eigen/Eigen/src/Core/ReturnByValue.h +117 -0
  117. data/vendor/eigen/Eigen/src/Core/Reverse.h +211 -0
  118. data/vendor/eigen/Eigen/src/Core/Select.h +162 -0
  119. data/vendor/eigen/Eigen/src/Core/SelfAdjointView.h +352 -0
  120. data/vendor/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +47 -0
  121. data/vendor/eigen/Eigen/src/Core/Solve.h +188 -0
  122. data/vendor/eigen/Eigen/src/Core/SolveTriangular.h +235 -0
  123. data/vendor/eigen/Eigen/src/Core/SolverBase.h +130 -0
  124. data/vendor/eigen/Eigen/src/Core/StableNorm.h +221 -0
  125. data/vendor/eigen/Eigen/src/Core/Stride.h +111 -0
  126. data/vendor/eigen/Eigen/src/Core/Swap.h +67 -0
  127. data/vendor/eigen/Eigen/src/Core/Transpose.h +403 -0
  128. data/vendor/eigen/Eigen/src/Core/Transpositions.h +407 -0
  129. data/vendor/eigen/Eigen/src/Core/TriangularMatrix.h +983 -0
  130. data/vendor/eigen/Eigen/src/Core/VectorBlock.h +96 -0
  131. data/vendor/eigen/Eigen/src/Core/VectorwiseOp.h +695 -0
  132. data/vendor/eigen/Eigen/src/Core/Visitor.h +273 -0
  133. data/vendor/eigen/Eigen/src/Core/arch/AVX/Complex.h +451 -0
  134. data/vendor/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +439 -0
  135. data/vendor/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +637 -0
  136. data/vendor/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +51 -0
  137. data/vendor/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +391 -0
  138. data/vendor/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1316 -0
  139. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +430 -0
  140. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +322 -0
  141. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +1061 -0
  142. data/vendor/eigen/Eigen/src/Core/arch/CUDA/Complex.h +103 -0
  143. data/vendor/eigen/Eigen/src/Core/arch/CUDA/Half.h +674 -0
  144. data/vendor/eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h +91 -0
  145. data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +333 -0
  146. data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +1124 -0
  147. data/vendor/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +212 -0
  148. data/vendor/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +29 -0
  149. data/vendor/eigen/Eigen/src/Core/arch/Default/Settings.h +49 -0
  150. data/vendor/eigen/Eigen/src/Core/arch/NEON/Complex.h +490 -0
  151. data/vendor/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +91 -0
  152. data/vendor/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +760 -0
  153. data/vendor/eigen/Eigen/src/Core/arch/SSE/Complex.h +471 -0
  154. data/vendor/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +562 -0
  155. data/vendor/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +895 -0
  156. data/vendor/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +77 -0
  157. data/vendor/eigen/Eigen/src/Core/arch/ZVector/Complex.h +397 -0
  158. data/vendor/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +137 -0
  159. data/vendor/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +945 -0
  160. data/vendor/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +168 -0
  161. data/vendor/eigen/Eigen/src/Core/functors/BinaryFunctors.h +475 -0
  162. data/vendor/eigen/Eigen/src/Core/functors/NullaryFunctors.h +188 -0
  163. data/vendor/eigen/Eigen/src/Core/functors/StlFunctors.h +136 -0
  164. data/vendor/eigen/Eigen/src/Core/functors/TernaryFunctors.h +25 -0
  165. data/vendor/eigen/Eigen/src/Core/functors/UnaryFunctors.h +792 -0
  166. data/vendor/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +2156 -0
  167. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +492 -0
  168. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +311 -0
  169. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +145 -0
  170. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +122 -0
  171. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +619 -0
  172. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +136 -0
  173. data/vendor/eigen/Eigen/src/Core/products/Parallelizer.h +163 -0
  174. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +521 -0
  175. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +287 -0
  176. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +260 -0
  177. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +118 -0
  178. data/vendor/eigen/Eigen/src/Core/products/SelfadjointProduct.h +133 -0
  179. data/vendor/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +93 -0
  180. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +466 -0
  181. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +315 -0
  182. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +350 -0
  183. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +255 -0
  184. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +335 -0
  185. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +163 -0
  186. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverVector.h +145 -0
  187. data/vendor/eigen/Eigen/src/Core/util/BlasUtil.h +398 -0
  188. data/vendor/eigen/Eigen/src/Core/util/Constants.h +547 -0
  189. data/vendor/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +83 -0
  190. data/vendor/eigen/Eigen/src/Core/util/ForwardDeclarations.h +302 -0
  191. data/vendor/eigen/Eigen/src/Core/util/MKL_support.h +130 -0
  192. data/vendor/eigen/Eigen/src/Core/util/Macros.h +1001 -0
  193. data/vendor/eigen/Eigen/src/Core/util/Memory.h +993 -0
  194. data/vendor/eigen/Eigen/src/Core/util/Meta.h +534 -0
  195. data/vendor/eigen/Eigen/src/Core/util/NonMPL2.h +3 -0
  196. data/vendor/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +27 -0
  197. data/vendor/eigen/Eigen/src/Core/util/StaticAssert.h +218 -0
  198. data/vendor/eigen/Eigen/src/Core/util/XprHelper.h +821 -0
  199. data/vendor/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +346 -0
  200. data/vendor/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +459 -0
  201. data/vendor/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +91 -0
  202. data/vendor/eigen/Eigen/src/Eigenvalues/EigenSolver.h +622 -0
  203. data/vendor/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +418 -0
  204. data/vendor/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +226 -0
  205. data/vendor/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +374 -0
  206. data/vendor/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +158 -0
  207. data/vendor/eigen/Eigen/src/Eigenvalues/RealQZ.h +654 -0
  208. data/vendor/eigen/Eigen/src/Eigenvalues/RealSchur.h +546 -0
  209. data/vendor/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +77 -0
  210. data/vendor/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +870 -0
  211. data/vendor/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +87 -0
  212. data/vendor/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +556 -0
  213. data/vendor/eigen/Eigen/src/Geometry/AlignedBox.h +392 -0
  214. data/vendor/eigen/Eigen/src/Geometry/AngleAxis.h +247 -0
  215. data/vendor/eigen/Eigen/src/Geometry/EulerAngles.h +114 -0
  216. data/vendor/eigen/Eigen/src/Geometry/Homogeneous.h +497 -0
  217. data/vendor/eigen/Eigen/src/Geometry/Hyperplane.h +282 -0
  218. data/vendor/eigen/Eigen/src/Geometry/OrthoMethods.h +234 -0
  219. data/vendor/eigen/Eigen/src/Geometry/ParametrizedLine.h +195 -0
  220. data/vendor/eigen/Eigen/src/Geometry/Quaternion.h +814 -0
  221. data/vendor/eigen/Eigen/src/Geometry/Rotation2D.h +199 -0
  222. data/vendor/eigen/Eigen/src/Geometry/RotationBase.h +206 -0
  223. data/vendor/eigen/Eigen/src/Geometry/Scaling.h +170 -0
  224. data/vendor/eigen/Eigen/src/Geometry/Transform.h +1542 -0
  225. data/vendor/eigen/Eigen/src/Geometry/Translation.h +208 -0
  226. data/vendor/eigen/Eigen/src/Geometry/Umeyama.h +166 -0
  227. data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +161 -0
  228. data/vendor/eigen/Eigen/src/Householder/BlockHouseholder.h +103 -0
  229. data/vendor/eigen/Eigen/src/Householder/Householder.h +172 -0
  230. data/vendor/eigen/Eigen/src/Householder/HouseholderSequence.h +470 -0
  231. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +226 -0
  232. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +228 -0
  233. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +246 -0
  234. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +400 -0
  235. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +462 -0
  236. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +394 -0
  237. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +216 -0
  238. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +115 -0
  239. data/vendor/eigen/Eigen/src/Jacobi/Jacobi.h +462 -0
  240. data/vendor/eigen/Eigen/src/LU/Determinant.h +101 -0
  241. data/vendor/eigen/Eigen/src/LU/FullPivLU.h +891 -0
  242. data/vendor/eigen/Eigen/src/LU/InverseImpl.h +415 -0
  243. data/vendor/eigen/Eigen/src/LU/PartialPivLU.h +611 -0
  244. data/vendor/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +83 -0
  245. data/vendor/eigen/Eigen/src/LU/arch/Inverse_SSE.h +338 -0
  246. data/vendor/eigen/Eigen/src/MetisSupport/MetisSupport.h +137 -0
  247. data/vendor/eigen/Eigen/src/OrderingMethods/Amd.h +445 -0
  248. data/vendor/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +1843 -0
  249. data/vendor/eigen/Eigen/src/OrderingMethods/Ordering.h +157 -0
  250. data/vendor/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +678 -0
  251. data/vendor/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +543 -0
  252. data/vendor/eigen/Eigen/src/QR/ColPivHouseholderQR.h +653 -0
  253. data/vendor/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +97 -0
  254. data/vendor/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +562 -0
  255. data/vendor/eigen/Eigen/src/QR/FullPivHouseholderQR.h +676 -0
  256. data/vendor/eigen/Eigen/src/QR/HouseholderQR.h +409 -0
  257. data/vendor/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +68 -0
  258. data/vendor/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +313 -0
  259. data/vendor/eigen/Eigen/src/SVD/BDCSVD.h +1246 -0
  260. data/vendor/eigen/Eigen/src/SVD/JacobiSVD.h +804 -0
  261. data/vendor/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +91 -0
  262. data/vendor/eigen/Eigen/src/SVD/SVDBase.h +315 -0
  263. data/vendor/eigen/Eigen/src/SVD/UpperBidiagonalization.h +414 -0
  264. data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +689 -0
  265. data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +199 -0
  266. data/vendor/eigen/Eigen/src/SparseCore/AmbiVector.h +377 -0
  267. data/vendor/eigen/Eigen/src/SparseCore/CompressedStorage.h +258 -0
  268. data/vendor/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +352 -0
  269. data/vendor/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +67 -0
  270. data/vendor/eigen/Eigen/src/SparseCore/SparseAssign.h +216 -0
  271. data/vendor/eigen/Eigen/src/SparseCore/SparseBlock.h +603 -0
  272. data/vendor/eigen/Eigen/src/SparseCore/SparseColEtree.h +206 -0
  273. data/vendor/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +341 -0
  274. data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +726 -0
  275. data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +148 -0
  276. data/vendor/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +320 -0
  277. data/vendor/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +138 -0
  278. data/vendor/eigen/Eigen/src/SparseCore/SparseDot.h +98 -0
  279. data/vendor/eigen/Eigen/src/SparseCore/SparseFuzzy.h +29 -0
  280. data/vendor/eigen/Eigen/src/SparseCore/SparseMap.h +305 -0
  281. data/vendor/eigen/Eigen/src/SparseCore/SparseMatrix.h +1403 -0
  282. data/vendor/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +405 -0
  283. data/vendor/eigen/Eigen/src/SparseCore/SparsePermutation.h +178 -0
  284. data/vendor/eigen/Eigen/src/SparseCore/SparseProduct.h +169 -0
  285. data/vendor/eigen/Eigen/src/SparseCore/SparseRedux.h +49 -0
  286. data/vendor/eigen/Eigen/src/SparseCore/SparseRef.h +397 -0
  287. data/vendor/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +656 -0
  288. data/vendor/eigen/Eigen/src/SparseCore/SparseSolverBase.h +124 -0
  289. data/vendor/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +198 -0
  290. data/vendor/eigen/Eigen/src/SparseCore/SparseTranspose.h +92 -0
  291. data/vendor/eigen/Eigen/src/SparseCore/SparseTriangularView.h +189 -0
  292. data/vendor/eigen/Eigen/src/SparseCore/SparseUtil.h +178 -0
  293. data/vendor/eigen/Eigen/src/SparseCore/SparseVector.h +478 -0
  294. data/vendor/eigen/Eigen/src/SparseCore/SparseView.h +253 -0
  295. data/vendor/eigen/Eigen/src/SparseCore/TriangularSolver.h +315 -0
  296. data/vendor/eigen/Eigen/src/SparseLU/SparseLU.h +773 -0
  297. data/vendor/eigen/Eigen/src/SparseLU/SparseLUImpl.h +66 -0
  298. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +226 -0
  299. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +110 -0
  300. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +301 -0
  301. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +80 -0
  302. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +181 -0
  303. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +179 -0
  304. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +107 -0
  305. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +280 -0
  306. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +126 -0
  307. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +130 -0
  308. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +223 -0
  309. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +258 -0
  310. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +137 -0
  311. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +136 -0
  312. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +83 -0
  313. data/vendor/eigen/Eigen/src/SparseQR/SparseQR.h +745 -0
  314. data/vendor/eigen/Eigen/src/StlSupport/StdDeque.h +126 -0
  315. data/vendor/eigen/Eigen/src/StlSupport/StdList.h +106 -0
  316. data/vendor/eigen/Eigen/src/StlSupport/StdVector.h +131 -0
  317. data/vendor/eigen/Eigen/src/StlSupport/details.h +84 -0
  318. data/vendor/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +1027 -0
  319. data/vendor/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +506 -0
  320. data/vendor/eigen/Eigen/src/misc/Image.h +82 -0
  321. data/vendor/eigen/Eigen/src/misc/Kernel.h +79 -0
  322. data/vendor/eigen/Eigen/src/misc/RealSvd2x2.h +55 -0
  323. data/vendor/eigen/Eigen/src/misc/blas.h +440 -0
  324. data/vendor/eigen/Eigen/src/misc/lapack.h +152 -0
  325. data/vendor/eigen/Eigen/src/misc/lapacke.h +16291 -0
  326. data/vendor/eigen/Eigen/src/misc/lapacke_mangling.h +17 -0
  327. data/vendor/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +332 -0
  328. data/vendor/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +552 -0
  329. data/vendor/eigen/Eigen/src/plugins/BlockMethods.h +1058 -0
  330. data/vendor/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +115 -0
  331. data/vendor/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +163 -0
  332. data/vendor/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +152 -0
  333. data/vendor/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +85 -0
  334. data/vendor/eigen/README.md +3 -0
  335. data/vendor/eigen/bench/README.txt +55 -0
  336. data/vendor/eigen/bench/btl/COPYING +340 -0
  337. data/vendor/eigen/bench/btl/README +154 -0
  338. data/vendor/eigen/bench/tensors/README +21 -0
  339. data/vendor/eigen/blas/README.txt +6 -0
  340. data/vendor/eigen/demos/mandelbrot/README +10 -0
  341. data/vendor/eigen/demos/mix_eigen_and_c/README +9 -0
  342. data/vendor/eigen/demos/opengl/README +13 -0
  343. data/vendor/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md +1760 -0
  344. data/vendor/eigen/unsupported/README.txt +50 -0
  345. data/vendor/tomotopy/LICENSE +21 -0
  346. data/vendor/tomotopy/README.kr.rst +375 -0
  347. data/vendor/tomotopy/README.rst +382 -0
  348. data/vendor/tomotopy/src/Labeling/FoRelevance.cpp +362 -0
  349. data/vendor/tomotopy/src/Labeling/FoRelevance.h +88 -0
  350. data/vendor/tomotopy/src/Labeling/Labeler.h +50 -0
  351. data/vendor/tomotopy/src/TopicModel/CT.h +37 -0
  352. data/vendor/tomotopy/src/TopicModel/CTModel.cpp +13 -0
  353. data/vendor/tomotopy/src/TopicModel/CTModel.hpp +293 -0
  354. data/vendor/tomotopy/src/TopicModel/DMR.h +51 -0
  355. data/vendor/tomotopy/src/TopicModel/DMRModel.cpp +13 -0
  356. data/vendor/tomotopy/src/TopicModel/DMRModel.hpp +374 -0
  357. data/vendor/tomotopy/src/TopicModel/DT.h +65 -0
  358. data/vendor/tomotopy/src/TopicModel/DTM.h +22 -0
  359. data/vendor/tomotopy/src/TopicModel/DTModel.cpp +15 -0
  360. data/vendor/tomotopy/src/TopicModel/DTModel.hpp +572 -0
  361. data/vendor/tomotopy/src/TopicModel/GDMR.h +37 -0
  362. data/vendor/tomotopy/src/TopicModel/GDMRModel.cpp +14 -0
  363. data/vendor/tomotopy/src/TopicModel/GDMRModel.hpp +485 -0
  364. data/vendor/tomotopy/src/TopicModel/HDP.h +74 -0
  365. data/vendor/tomotopy/src/TopicModel/HDPModel.cpp +13 -0
  366. data/vendor/tomotopy/src/TopicModel/HDPModel.hpp +592 -0
  367. data/vendor/tomotopy/src/TopicModel/HLDA.h +40 -0
  368. data/vendor/tomotopy/src/TopicModel/HLDAModel.cpp +13 -0
  369. data/vendor/tomotopy/src/TopicModel/HLDAModel.hpp +681 -0
  370. data/vendor/tomotopy/src/TopicModel/HPA.h +27 -0
  371. data/vendor/tomotopy/src/TopicModel/HPAModel.cpp +21 -0
  372. data/vendor/tomotopy/src/TopicModel/HPAModel.hpp +588 -0
  373. data/vendor/tomotopy/src/TopicModel/LDA.h +144 -0
  374. data/vendor/tomotopy/src/TopicModel/LDACVB0Model.hpp +442 -0
  375. data/vendor/tomotopy/src/TopicModel/LDAModel.cpp +13 -0
  376. data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +1058 -0
  377. data/vendor/tomotopy/src/TopicModel/LLDA.h +45 -0
  378. data/vendor/tomotopy/src/TopicModel/LLDAModel.cpp +13 -0
  379. data/vendor/tomotopy/src/TopicModel/LLDAModel.hpp +203 -0
  380. data/vendor/tomotopy/src/TopicModel/MGLDA.h +63 -0
  381. data/vendor/tomotopy/src/TopicModel/MGLDAModel.cpp +17 -0
  382. data/vendor/tomotopy/src/TopicModel/MGLDAModel.hpp +558 -0
  383. data/vendor/tomotopy/src/TopicModel/PA.h +43 -0
  384. data/vendor/tomotopy/src/TopicModel/PAModel.cpp +13 -0
  385. data/vendor/tomotopy/src/TopicModel/PAModel.hpp +467 -0
  386. data/vendor/tomotopy/src/TopicModel/PLDA.h +17 -0
  387. data/vendor/tomotopy/src/TopicModel/PLDAModel.cpp +13 -0
  388. data/vendor/tomotopy/src/TopicModel/PLDAModel.hpp +214 -0
  389. data/vendor/tomotopy/src/TopicModel/SLDA.h +54 -0
  390. data/vendor/tomotopy/src/TopicModel/SLDAModel.cpp +17 -0
  391. data/vendor/tomotopy/src/TopicModel/SLDAModel.hpp +456 -0
  392. data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +692 -0
  393. data/vendor/tomotopy/src/Utils/AliasMethod.hpp +169 -0
  394. data/vendor/tomotopy/src/Utils/Dictionary.h +80 -0
  395. data/vendor/tomotopy/src/Utils/EigenAddonOps.hpp +181 -0
  396. data/vendor/tomotopy/src/Utils/LBFGS.h +202 -0
  397. data/vendor/tomotopy/src/Utils/LBFGS/LineSearchBacktracking.h +120 -0
  398. data/vendor/tomotopy/src/Utils/LBFGS/LineSearchBracketing.h +122 -0
  399. data/vendor/tomotopy/src/Utils/LBFGS/Param.h +213 -0
  400. data/vendor/tomotopy/src/Utils/LUT.hpp +82 -0
  401. data/vendor/tomotopy/src/Utils/MultiNormalDistribution.hpp +69 -0
  402. data/vendor/tomotopy/src/Utils/PolyaGamma.hpp +200 -0
  403. data/vendor/tomotopy/src/Utils/PolyaGammaHybrid.hpp +672 -0
  404. data/vendor/tomotopy/src/Utils/ThreadPool.hpp +150 -0
  405. data/vendor/tomotopy/src/Utils/Trie.hpp +220 -0
  406. data/vendor/tomotopy/src/Utils/TruncMultiNormal.hpp +94 -0
  407. data/vendor/tomotopy/src/Utils/Utils.hpp +337 -0
  408. data/vendor/tomotopy/src/Utils/avx_gamma.h +46 -0
  409. data/vendor/tomotopy/src/Utils/avx_mathfun.h +736 -0
  410. data/vendor/tomotopy/src/Utils/exception.h +28 -0
  411. data/vendor/tomotopy/src/Utils/math.h +281 -0
  412. data/vendor/tomotopy/src/Utils/rtnorm.hpp +2690 -0
  413. data/vendor/tomotopy/src/Utils/sample.hpp +192 -0
  414. data/vendor/tomotopy/src/Utils/serializer.hpp +695 -0
  415. data/vendor/tomotopy/src/Utils/slp.hpp +131 -0
  416. data/vendor/tomotopy/src/Utils/sse_gamma.h +48 -0
  417. data/vendor/tomotopy/src/Utils/sse_mathfun.h +710 -0
  418. data/vendor/tomotopy/src/Utils/text.hpp +49 -0
  419. data/vendor/tomotopy/src/Utils/tvector.hpp +543 -0
  420. metadata +531 -0
@@ -0,0 +1,19 @@
1
+ /**
2
+ * @file EigenRand
3
+ * @author bab2min (bab2min@gmail.com)
4
+ * @brief
5
+ * @version 0.2.0
6
+ * @date 2020-06-22
7
+ *
8
+ * @copyright Copyright (c) 2020
9
+ *
10
+ */
11
+
12
+ #ifndef EIGENRAND_EIGENRAND_H
13
+ #define EIGENRAND_EIGENRAND_H
14
+
15
+ #include <Eigen/Dense>
16
+ #include <EigenRand/Macro.h>
17
+ #include <EigenRand/Core.h>
18
+
19
+ #endif
@@ -0,0 +1,24 @@
1
+ /**
2
+ * @file Macro.h
3
+ * @author bab2min (bab2min@gmail.com)
4
+ * @brief
5
+ * @version 0.2.1
6
+ * @date 2020-07-11
7
+ *
8
+ * @copyright Copyright (c) 2020
9
+ *
10
+ */
11
+
12
+ #ifndef EIGENRAND_MACRO_H
13
+ #define EIGENRAND_MACRO_H
14
+
15
+ #define EIGENRAND_WORLD_VERSION 0
16
+ #define EIGENRAND_MAJOR_VERSION 2
17
+ #define EIGENRAND_MINOR_VERSION 0
18
+
19
+ #if EIGEN_VERSION_AT_LEAST(3,3,7)
20
+ #else
21
+ #error At least Eigen 3.3.7 is required.
22
+ #endif
23
+
24
+ #endif
@@ -0,0 +1,978 @@
1
+ /**
2
+ * @file MorePacketMath.h
3
+ * @author bab2min (bab2min@gmail.com)
4
+ * @brief
5
+ * @version 0.2.0
6
+ * @date 2020-06-22
7
+ *
8
+ * @copyright Copyright (c) 2020
9
+ *
10
+ */
11
+
12
+ #ifndef EIGENRAND_MORE_PACKET_MATH_H
13
+ #define EIGENRAND_MORE_PACKET_MATH_H
14
+
15
+ #include <Eigen/Dense>
16
+
17
+ namespace Eigen
18
+ {
19
+ namespace internal
20
+ {
21
+ template<typename Packet>
22
+ struct reinterpreter
23
+ {
24
+ };
25
+
26
+ template<typename Packet>
27
+ inline auto reinterpret_to_float(const Packet& x)
28
+ -> decltype(reinterpreter<Packet>{}.to_float(x))
29
+ {
30
+ return reinterpreter<Packet>{}.to_float(x);
31
+ }
32
+
33
+ template<typename Packet>
34
+ inline auto reinterpret_to_double(const Packet& x)
35
+ -> decltype(reinterpreter<Packet>{}.to_double(x))
36
+ {
37
+ return reinterpreter<Packet>{}.to_double(x);
38
+ }
39
+
40
+ template<typename Packet>
41
+ inline auto reinterpret_to_int(const Packet& x)
42
+ -> decltype(reinterpreter<Packet>{}.to_int(x))
43
+ {
44
+ return reinterpreter<Packet>{}.to_int(x);
45
+ }
46
+
47
+ template<typename Packet>
48
+ EIGEN_STRONG_INLINE Packet pseti64(uint64_t a);
49
+
50
+ template<typename Packet>
51
+ EIGEN_STRONG_INLINE Packet pcmpeq(const Packet& a, const Packet& b);
52
+
53
+ template<typename Packet>
54
+ EIGEN_STRONG_INLINE Packet psll(const Packet& a, int b);
55
+
56
+ template<typename Packet>
57
+ EIGEN_STRONG_INLINE Packet psrl(const Packet& a, int b);
58
+
59
+ template<typename Packet>
60
+ EIGEN_STRONG_INLINE Packet psll64(const Packet& a, int b);
61
+
62
+ template<typename Packet>
63
+ EIGEN_STRONG_INLINE Packet psrl64(const Packet& a, int b);
64
+
65
+ template<typename Packet>
66
+ EIGEN_STRONG_INLINE int pmovemask(const Packet& a);
67
+
68
+ template<>
69
+ EIGEN_STRONG_INLINE uint64_t psll64<uint64_t>(const uint64_t& a, int b)
70
+ {
71
+ return a << b;
72
+ }
73
+
74
+ template<>
75
+ EIGEN_STRONG_INLINE uint64_t psrl64<uint64_t>(const uint64_t& a, int b)
76
+ {
77
+ return a >> b;
78
+ }
79
+
80
+ template<typename Packet>
81
+ EIGEN_STRONG_INLINE void psincos(Packet x, Packet &s, Packet &c)
82
+ {
83
+ Packet xmm1, xmm2, xmm3 = pset1<Packet>(0), sign_bit_sin, y;
84
+ using IntPacket = decltype(reinterpret_to_int(x));
85
+ IntPacket emm0, emm2, emm4;
86
+
87
+ sign_bit_sin = x;
88
+ /* take the absolute value */
89
+ x = pabs(x);
90
+ /* extract the sign bit (upper one) */
91
+ sign_bit_sin = reinterpret_to_float(
92
+ pand(reinterpret_to_int(sign_bit_sin), pset1<IntPacket>(0x80000000))
93
+ );
94
+
95
+ /* scale by 4/Pi */
96
+ y = pmul(x, pset1<Packet>(1.27323954473516));
97
+
98
+ /* store the integer part of y in emm2 */
99
+ emm2 = pcast<Packet, IntPacket>(y);
100
+
101
+ /* j=(j+1) & (~1) (see the cephes sources) */
102
+ emm2 = padd(emm2, pset1<IntPacket>(1));
103
+ emm2 = pand(emm2, pset1<IntPacket>(~1));
104
+ y = pcast<IntPacket, Packet>(emm2);
105
+
106
+ emm4 = emm2;
107
+
108
+ /* get the swap sign flag for the sine */
109
+ emm0 = pand(emm2, pset1<IntPacket>(4));
110
+ emm0 = psll(emm0, 29);
111
+ Packet swap_sign_bit_sin = reinterpret_to_float(emm0);
112
+
113
+ /* get the polynom selection mask for the sine*/
114
+ emm2 = pand(emm2, pset1<IntPacket>(2));
115
+
116
+ emm2 = pcmpeq(emm2, pset1<IntPacket>(0));
117
+ Packet poly_mask = reinterpret_to_float(emm2);
118
+
119
+ /* The magic pass: "Extended precision modular arithmetic"
120
+ x = ((x - y * DP1) - y * DP2) - y * DP3; */
121
+ xmm1 = pset1<Packet>(-0.78515625);
122
+ xmm2 = pset1<Packet>(-2.4187564849853515625e-4);
123
+ xmm3 = pset1<Packet>(-3.77489497744594108e-8);
124
+ xmm1 = pmul(y, xmm1);
125
+ xmm2 = pmul(y, xmm2);
126
+ xmm3 = pmul(y, xmm3);
127
+ x = padd(x, xmm1);
128
+ x = padd(x, xmm2);
129
+ x = padd(x, xmm3);
130
+
131
+ emm4 = psub(emm4, pset1<IntPacket>(2));
132
+ emm4 = pandnot(emm4, pset1<IntPacket>(4));
133
+ emm4 = psll(emm4, 29);
134
+ Packet sign_bit_cos = reinterpret_to_float(emm4);
135
+ sign_bit_sin = pxor(sign_bit_sin, swap_sign_bit_sin);
136
+
137
+
138
+ /* Evaluate the first polynom (0 <= x <= Pi/4) */
139
+ Packet z = pmul(x, x);
140
+ y = pset1<Packet>(2.443315711809948E-005);
141
+
142
+ y = pmul(y, z);
143
+ y = padd(y, pset1<Packet>(-1.388731625493765E-003));
144
+ y = pmul(y, z);
145
+ y = padd(y, pset1<Packet>(4.166664568298827E-002));
146
+ y = pmul(y, z);
147
+ y = pmul(y, z);
148
+ Packet tmp = pmul(z, pset1<Packet>(0.5));
149
+ y = psub(y, tmp);
150
+ y = padd(y, pset1<Packet>(1));
151
+
152
+ /* Evaluate the second polynom (Pi/4 <= x <= 0) */
153
+
154
+ Packet y2 = pset1<Packet>(-1.9515295891E-4);
155
+ y2 = pmul(y2, z);
156
+ y2 = padd(y2, pset1<Packet>(8.3321608736E-3));
157
+ y2 = pmul(y2, z);
158
+ y2 = padd(y2, pset1<Packet>(-1.6666654611E-1));
159
+ y2 = pmul(y2, z);
160
+ y2 = pmul(y2, x);
161
+ y2 = padd(y2, x);
162
+
163
+ /* select the correct result from the two polynoms */
164
+ xmm3 = poly_mask;
165
+ Packet ysin2 = pand(xmm3, y2);
166
+ Packet ysin1 = pandnot(xmm3, y);
167
+ y2 = psub(y2, ysin2);
168
+ y = psub(y, ysin1);
169
+
170
+ xmm1 = padd(ysin1, ysin2);
171
+ xmm2 = padd(y, y2);
172
+
173
+ /* update the sign */
174
+ s = pxor(xmm1, sign_bit_sin);
175
+ c = pxor(xmm2, sign_bit_cos);
176
+ }
177
+
178
+ // approximation : lgamma(z) ~= (z+2.5)ln(z+3) - z - 3 + 0.5 ln (2pi) + 1/12/(z + 3) - ln (z(z+1)(z+2))
179
+ template<typename Packet>
180
+ EIGEN_STRONG_INLINE Packet plgamma(const Packet& x)
181
+ {
182
+ auto x_3 = padd(x, pset1<Packet>(3));
183
+ auto ret = pmul(padd(x_3, pset1<Packet>(-0.5)), plog(x_3));
184
+ ret = psub(ret, x_3);
185
+ ret = padd(ret, pset1<Packet>(0.9189385332046727));
186
+ ret = padd(ret, pdiv(pset1<Packet>(1 / 12.), x_3));
187
+ ret = psub(ret, plog(pmul(
188
+ pmul(psub(x_3, pset1<Packet>(1)), psub(x_3, pset1<Packet>(2))), x)));
189
+ return ret;
190
+ }
191
+
192
+ template<typename Packet>
193
+ EIGEN_STRONG_INLINE Packet pcmplt(const Packet& a, const Packet& b);
194
+
195
+ template<typename Packet>
196
+ EIGEN_STRONG_INLINE Packet pcmple(const Packet& a, const Packet& b);
197
+
198
+ template<typename Packet>
199
+ EIGEN_STRONG_INLINE Packet pblendv(const Packet& ifPacket, const Packet& thenPacket, const Packet& elsePacket);
200
+
201
+ template<typename Packet>
202
+ EIGEN_STRONG_INLINE Packet pgather(const int* addr, const Packet& index);
203
+
204
+ template<typename Packet>
205
+ EIGEN_STRONG_INLINE auto pgather(const float* addr, const Packet& index) -> decltype(reinterpret_to_float(std::declval<Packet>()));
206
+
207
+ template<typename Packet>
208
+ EIGEN_STRONG_INLINE auto pgather(const double* addr, const Packet& index, bool upperhalf = false) -> decltype(reinterpret_to_double(std::declval<Packet>()));
209
+
210
+ template<typename Packet>
211
+ EIGEN_STRONG_INLINE Packet ptruncate(const Packet& a);
212
+
213
+ template<typename Packet>
214
+ EIGEN_STRONG_INLINE Packet pcmpeq64(const Packet& a, const Packet& b);
215
+
216
+ template<typename Packet>
217
+ EIGEN_STRONG_INLINE Packet pmuluadd64(const Packet& a, uint64_t b, uint64_t c);
218
+
219
+ template<typename IntPacket>
220
+ EIGEN_STRONG_INLINE auto bit_to_ur_float(const IntPacket& x) -> decltype(reinterpret_to_float(x))
221
+ {
222
+ using FloatPacket = decltype(reinterpret_to_float(x));
223
+
224
+ const IntPacket lower = pset1<IntPacket>(0x7FFFFF),
225
+ upper = pset1<IntPacket>(127 << 23);
226
+ const FloatPacket one = pset1<FloatPacket>(1);
227
+
228
+ return psub(reinterpret_to_float(por(pand(x, lower), upper)), one);
229
+ }
230
+
231
+ template<typename IntPacket>
232
+ EIGEN_STRONG_INLINE auto bit_to_ur_double(const IntPacket& x) -> decltype(reinterpret_to_double(x))
233
+ {
234
+ using DoublePacket = decltype(reinterpret_to_double(x));
235
+
236
+ const IntPacket lower = pseti64<IntPacket>(0xFFFFFFFFFFFFFull),
237
+ upper = pseti64<IntPacket>(1023ull << 52);
238
+ const DoublePacket one = pset1<DoublePacket>(1);
239
+
240
+ return psub(reinterpret_to_double(por(pand(x, lower), upper)), one);
241
+ }
242
+
243
+ template<typename Scalar>
244
+ struct bit_scalar;
245
+
246
+ template<>
247
+ struct bit_scalar<float>
248
+ {
249
+ float to_ur(uint32_t x)
250
+ {
251
+ union
252
+ {
253
+ uint32_t u;
254
+ float f;
255
+ };
256
+ u = (x & 0x7FFFFF) | (127 << 23);
257
+ return f - 1.f;
258
+ }
259
+
260
+ float to_nzur(uint32_t x)
261
+ {
262
+ return to_ur(x) + std::numeric_limits<float>::epsilon() / 8;
263
+ }
264
+ };
265
+
266
+ template<>
267
+ struct bit_scalar<double>
268
+ {
269
+ double to_ur(uint64_t x)
270
+ {
271
+ union
272
+ {
273
+ uint64_t u;
274
+ double f;
275
+ };
276
+ u = (x & 0xFFFFFFFFFFFFFull) | (1023ull << 52);
277
+ return f - 1.;
278
+ }
279
+
280
+ double to_nzur(uint64_t x)
281
+ {
282
+ return to_ur(x) + std::numeric_limits<double>::epsilon() / 8;
283
+ }
284
+ };
285
+
286
+
287
+ struct float2
288
+ {
289
+ float f[2];
290
+ };
291
+
292
+ EIGEN_STRONG_INLINE float2 bit_to_ur_float(uint64_t x)
293
+ {
294
+ bit_scalar<float> bs;
295
+ float2 ret;
296
+ ret.f[0] = bs.to_ur(x & 0xFFFFFFFF);
297
+ ret.f[1] = bs.to_ur(x >> 32);
298
+ return ret;
299
+ }
300
+ }
301
+ }
302
+
303
+ #ifdef EIGEN_VECTORIZE_AVX
304
+ #include <immintrin.h>
305
+
306
+ namespace Eigen
307
+ {
308
+ namespace internal
309
+ {
310
+ template<>
311
+ struct reinterpreter<Packet8i>
312
+ {
313
+ EIGEN_STRONG_INLINE Packet8f to_float(const Packet8i& x)
314
+ {
315
+ return _mm256_castsi256_ps(x);
316
+ }
317
+
318
+ EIGEN_STRONG_INLINE Packet4d to_double(const Packet8i& x)
319
+ {
320
+ return _mm256_castsi256_pd(x);
321
+ }
322
+
323
+ EIGEN_STRONG_INLINE Packet8i to_int(const Packet8i& x)
324
+ {
325
+ return x;
326
+ }
327
+ };
328
+
329
+ template<>
330
+ struct reinterpreter<Packet8f>
331
+ {
332
+ EIGEN_STRONG_INLINE Packet8f to_float(const Packet8f& x)
333
+ {
334
+ return x;
335
+ }
336
+
337
+ EIGEN_STRONG_INLINE Packet4d to_double(const Packet8f& x)
338
+ {
339
+ return _mm256_castps_pd(x);
340
+ }
341
+
342
+ EIGEN_STRONG_INLINE Packet8i to_int(const Packet8f& x)
343
+ {
344
+ return _mm256_castps_si256(x);
345
+ }
346
+ };
347
+
348
+ template<>
349
+ struct reinterpreter<Packet4d>
350
+ {
351
+ EIGEN_STRONG_INLINE Packet8f to_float(const Packet4d& x)
352
+ {
353
+ return _mm256_castpd_ps(x);
354
+ }
355
+
356
+ EIGEN_STRONG_INLINE Packet4d to_double(const Packet4d& x)
357
+ {
358
+ return x;
359
+ }
360
+
361
+ EIGEN_STRONG_INLINE Packet8i to_int(const Packet4d& x)
362
+ {
363
+ return _mm256_castpd_si256(x);
364
+ }
365
+ };
366
+
367
+ EIGEN_STRONG_INLINE void split_two(const Packet8i& x, Packet4i& a, Packet4i& b)
368
+ {
369
+ a = _mm256_extractf128_si256(x, 0);
370
+ b = _mm256_extractf128_si256(x, 1);
371
+ }
372
+
373
+ EIGEN_STRONG_INLINE Packet8i combine_two(const Packet4i& a, const Packet4i& b)
374
+ {
375
+ return _mm256_insertf128_si256(_mm256_castsi128_si256(a), b, 1);
376
+ }
377
+
378
+ EIGEN_STRONG_INLINE void split_two(const Packet8f& x, Packet4f& a, Packet4f& b)
379
+ {
380
+ a = _mm256_extractf128_ps(x, 0);
381
+ b = _mm256_extractf128_ps(x, 1);
382
+ }
383
+
384
+ EIGEN_STRONG_INLINE Packet8f combine_two(const Packet4f& a, const Packet4f& b)
385
+ {
386
+ return _mm256_insertf128_ps(_mm256_castps128_ps256(a), b, 1);
387
+ }
388
+
389
+
390
+ EIGEN_STRONG_INLINE Packet4i combine_low32(const Packet8i& a)
391
+ {
392
+ #ifdef EIGEN_VECTORIZE_AVX2
393
+ return _mm256_castsi256_si128(_mm256_permutevar8x32_epi32(a, _mm256_setr_epi32(0, 2, 4, 6, 1, 3, 5, 7)));
394
+ #else
395
+ auto sc = _mm256_permutevar_ps(_mm256_castsi256_ps(a), _mm256_setr_epi32(0, 2, 1, 3, 1, 3, 0, 2));
396
+ return _mm_castps_si128(_mm_blend_ps(_mm256_extractf128_ps(sc, 0), _mm256_extractf128_ps(sc, 1), 0b1100));
397
+ #endif
398
+ }
399
+
400
+ template<>
401
+ EIGEN_STRONG_INLINE Packet8i pseti64<Packet8i>(uint64_t a)
402
+ {
403
+ return _mm256_set1_epi64x(a);
404
+ }
405
+
406
+ template<>
407
+ EIGEN_STRONG_INLINE Packet8i pcmpeq<Packet8i>(const Packet8i& a, const Packet8i& b)
408
+ {
409
+ #ifdef EIGEN_VECTORIZE_AVX2
410
+ return _mm256_cmpeq_epi32(a, b);
411
+ #else
412
+ Packet4i a1, a2, b1, b2;
413
+ split_two(a, a1, a2);
414
+ split_two(b, b1, b2);
415
+ return combine_two(_mm_cmpeq_epi32(a1, b1), _mm_cmpeq_epi32(a2, b2));
416
+ #endif
417
+ }
418
+
419
+ template<>
420
+ EIGEN_STRONG_INLINE Packet8i psll<Packet8i>(const Packet8i& a, int b)
421
+ {
422
+ #ifdef EIGEN_VECTORIZE_AVX2
423
+ return _mm256_slli_epi32(a, b);
424
+ #else
425
+ Packet4i a1, a2;
426
+ split_two(a, a1, a2);
427
+ return combine_two(_mm_slli_epi32(a1, b), _mm_slli_epi32(a2, b));
428
+ #endif
429
+ }
430
+
431
+ template<>
432
+ EIGEN_STRONG_INLINE Packet8i psrl<Packet8i>(const Packet8i& a, int b)
433
+ {
434
+ #ifdef EIGEN_VECTORIZE_AVX2
435
+ return _mm256_srli_epi32(a, b);
436
+ #else
437
+ Packet4i a1, a2;
438
+ split_two(a, a1, a2);
439
+ return combine_two(_mm_srli_epi32(a1, b), _mm_srli_epi32(a2, b));
440
+ #endif
441
+ }
442
+
443
+ template<>
444
+ EIGEN_STRONG_INLINE Packet8i psll64<Packet8i>(const Packet8i& a, int b)
445
+ {
446
+ #ifdef EIGEN_VECTORIZE_AVX2
447
+ return _mm256_slli_epi64(a, b);
448
+ #else
449
+ Packet4i a1, a2;
450
+ split_two(a, a1, a2);
451
+ return combine_two(_mm_slli_epi64(a1, b), _mm_slli_epi64(a2, b));
452
+ #endif
453
+ }
454
+
455
+ template<>
456
+ EIGEN_STRONG_INLINE Packet8i psrl64<Packet8i>(const Packet8i& a, int b)
457
+ {
458
+ #ifdef EIGEN_VECTORIZE_AVX2
459
+ return _mm256_srli_epi64(a, b);
460
+ #else
461
+ Packet4i a1, a2;
462
+ split_two(a, a1, a2);
463
+ return combine_two(_mm_srli_epi64(a1, b), _mm_srli_epi64(a2, b));
464
+ #endif
465
+ }
466
+
467
+ template<> EIGEN_STRONG_INLINE Packet8i padd<Packet8i>(const Packet8i& a, const Packet8i& b)
468
+ {
469
+ #ifdef EIGEN_VECTORIZE_AVX2
470
+ return _mm256_add_epi32(a, b);
471
+ #else
472
+ Packet4i a1, a2, b1, b2;
473
+ split_two(a, a1, a2);
474
+ split_two(b, b1, b2);
475
+ return combine_two(_mm_add_epi32(a1, b1), _mm_add_epi32(a2, b2));
476
+ #endif
477
+ }
478
+
479
+ template<> EIGEN_STRONG_INLINE Packet8i psub<Packet8i>(const Packet8i& a, const Packet8i& b)
480
+ {
481
+ #ifdef EIGEN_VECTORIZE_AVX2
482
+ return _mm256_sub_epi32(a, b);
483
+ #else
484
+ Packet4i a1, a2, b1, b2;
485
+ split_two(a, a1, a2);
486
+ split_two(b, b1, b2);
487
+ return combine_two(_mm_sub_epi32(a1, b1), _mm_sub_epi32(a2, b2));
488
+ #endif
489
+ }
490
+
491
+ template<> EIGEN_STRONG_INLINE Packet8i pand<Packet8i>(const Packet8i& a, const Packet8i& b)
492
+ {
493
+ #ifdef EIGEN_VECTORIZE_AVX2
494
+ return _mm256_and_si256(a, b);
495
+ #else
496
+ return reinterpret_to_int(_mm256_and_ps(reinterpret_to_float(a), reinterpret_to_float(b)));
497
+ #endif
498
+ }
499
+
500
+ template<> EIGEN_STRONG_INLINE Packet8i pandnot<Packet8i>(const Packet8i& a, const Packet8i& b)
501
+ {
502
+ #ifdef EIGEN_VECTORIZE_AVX2
503
+ return _mm256_andnot_si256(a, b);
504
+ #else
505
+ return reinterpret_to_int(_mm256_andnot_ps(reinterpret_to_float(a), reinterpret_to_float(b)));
506
+ #endif
507
+ }
508
+
509
+ template<> EIGEN_STRONG_INLINE Packet8i por<Packet8i>(const Packet8i& a, const Packet8i& b)
510
+ {
511
+ #ifdef EIGEN_VECTORIZE_AVX2
512
+ return _mm256_or_si256(a, b);
513
+ #else
514
+ return reinterpret_to_int(_mm256_or_ps(reinterpret_to_float(a), reinterpret_to_float(b)));
515
+ #endif
516
+ }
517
+
518
+ template<> EIGEN_STRONG_INLINE Packet8i pxor<Packet8i>(const Packet8i& a, const Packet8i& b)
519
+ {
520
+ #ifdef EIGEN_VECTORIZE_AVX2
521
+ return _mm256_xor_si256(a, b);
522
+ #else
523
+ return reinterpret_to_int(_mm256_xor_ps(reinterpret_to_float(a), reinterpret_to_float(b)));
524
+ #endif
525
+ }
526
+
527
+ template<>
528
+ EIGEN_STRONG_INLINE Packet8i pcmplt<Packet8i>(const Packet8i& a, const Packet8i& b)
529
+ {
530
+ return _mm256_cmpgt_epi32(b, a);
531
+ }
532
+
533
+ template<>
534
+ EIGEN_STRONG_INLINE Packet8f pcmplt<Packet8f>(const Packet8f& a, const Packet8f& b)
535
+ {
536
+ return _mm256_cmp_ps(a, b, _CMP_LT_OQ);
537
+ }
538
+
539
+ template<>
540
+ EIGEN_STRONG_INLINE Packet8f pcmple<Packet8f>(const Packet8f& a, const Packet8f& b)
541
+ {
542
+ return _mm256_cmp_ps(a, b, _CMP_LE_OQ);
543
+ }
544
+
545
+ template<>
546
+ EIGEN_STRONG_INLINE Packet4d pcmplt<Packet4d>(const Packet4d& a, const Packet4d& b)
547
+ {
548
+ return _mm256_cmp_pd(a, b, _CMP_LT_OQ);
549
+ }
550
+
551
+ template<>
552
+ EIGEN_STRONG_INLINE Packet4d pcmple<Packet4d>(const Packet4d& a, const Packet4d& b)
553
+ {
554
+ return _mm256_cmp_pd(a, b, _CMP_LE_OQ);
555
+ }
556
+
557
+ template<>
558
+ EIGEN_STRONG_INLINE Packet8f pblendv(const Packet8f& ifPacket, const Packet8f& thenPacket, const Packet8f& elsePacket)
559
+ {
560
+ return _mm256_blendv_ps(elsePacket, thenPacket, ifPacket);
561
+ }
562
+
563
+ template<>
564
+ EIGEN_STRONG_INLINE Packet8i pblendv(const Packet8i& ifPacket, const Packet8i& thenPacket, const Packet8i& elsePacket)
565
+ {
566
+ return _mm256_castps_si256(_mm256_blendv_ps(
567
+ _mm256_castsi256_ps(elsePacket),
568
+ _mm256_castsi256_ps(thenPacket),
569
+ _mm256_castsi256_ps(ifPacket)
570
+ ));
571
+ }
572
+
573
+ template<>
574
+ EIGEN_STRONG_INLINE Packet4d pblendv(const Packet4d& ifPacket, const Packet4d& thenPacket, const Packet4d& elsePacket)
575
+ {
576
+ return _mm256_blendv_pd(elsePacket, thenPacket, ifPacket);
577
+ }
578
+
579
+ template<>
580
+ EIGEN_STRONG_INLINE Packet8i pgather<Packet8i>(const int* addr, const Packet8i& index)
581
+ {
582
+ #ifdef EIGEN_VECTORIZE_AVX2
583
+ return _mm256_i32gather_epi32(addr, index, 4);
584
+ #else
585
+ uint32_t u[8];
586
+ _mm256_storeu_si256((Packet8i*)u, index);
587
+ return _mm256_setr_epi32(addr[u[0]], addr[u[1]], addr[u[2]], addr[u[3]],
588
+ addr[u[4]], addr[u[5]], addr[u[6]], addr[u[7]]);
589
+ #endif
590
+ }
591
+
592
+ template<>
593
+ EIGEN_STRONG_INLINE Packet8f pgather<Packet8i>(const float *addr, const Packet8i& index)
594
+ {
595
+ #ifdef EIGEN_VECTORIZE_AVX2
596
+ return _mm256_i32gather_ps(addr, index, 4);
597
+ #else
598
+ uint32_t u[8];
599
+ _mm256_storeu_si256((Packet8i*)u, index);
600
+ return _mm256_setr_ps(addr[u[0]], addr[u[1]], addr[u[2]], addr[u[3]],
601
+ addr[u[4]], addr[u[5]], addr[u[6]], addr[u[7]]);
602
+ #endif
603
+ }
604
+
605
+ template<>
606
+ EIGEN_STRONG_INLINE Packet4d pgather<Packet8i>(const double *addr, const Packet8i& index, bool upperhalf)
607
+ {
608
+ #ifdef EIGEN_VECTORIZE_AVX2
609
+ return _mm256_i32gather_pd(addr, _mm256_castsi256_si128(index), 8);
610
+ #else
611
+ uint32_t u[8];
612
+ _mm256_storeu_si256((Packet8i*)u, index);
613
+ if (upperhalf)
614
+ {
615
+ return _mm256_setr_pd(addr[u[4]], addr[u[5]], addr[u[6]], addr[u[7]]);
616
+ }
617
+ else
618
+ {
619
+ return _mm256_setr_pd(addr[u[0]], addr[u[1]], addr[u[2]], addr[u[3]]);
620
+ }
621
+ #endif
622
+ }
623
+
624
+ template<>
625
+ EIGEN_STRONG_INLINE int pmovemask<Packet8f>(const Packet8f& a)
626
+ {
627
+ return _mm256_movemask_ps(a);
628
+ }
629
+
630
+ template<>
631
+ EIGEN_STRONG_INLINE int pmovemask<Packet4d>(const Packet4d& a)
632
+ {
633
+ return _mm256_movemask_pd(a);
634
+ }
635
+
636
+ template<>
637
+ EIGEN_STRONG_INLINE int pmovemask<Packet8i>(const Packet8i& a)
638
+ {
639
+ return pmovemask(_mm256_castsi256_ps(a));
640
+ }
641
+
642
+ template<>
643
+ EIGEN_STRONG_INLINE Packet8f ptruncate<Packet8f>(const Packet8f& a)
644
+ {
645
+ return _mm256_round_ps(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
646
+ }
647
+
648
+ template<>
649
+ EIGEN_STRONG_INLINE Packet4d ptruncate<Packet4d>(const Packet4d& a)
650
+ {
651
+ return _mm256_round_pd(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
652
+ }
653
+
654
+ template<>
655
+ EIGEN_STRONG_INLINE Packet8i pcmpeq64<Packet8i>(const Packet8i& a, const Packet8i& b)
656
+ {
657
+ #ifdef EIGEN_VECTORIZE_AVX2
658
+ return _mm256_cmpeq_epi64(a, b);
659
+ #else
660
+ Packet4i a1, a2, b1, b2;
661
+ split_two(a, a1, a2);
662
+ split_two(b, b1, b2);
663
+ return combine_two(_mm_cmpeq_epi64(a1, b1), _mm_cmpeq_epi64(a2, b2));
664
+ #endif
665
+ }
666
+
667
+ template<>
668
+ EIGEN_STRONG_INLINE Packet8i pmuluadd64<Packet8i>(const Packet8i& a, uint64_t b, uint64_t c)
669
+ {
670
+ uint64_t u[4];
671
+ _mm256_storeu_si256((__m256i*)u, a);
672
+ u[0] = u[0] * b + c;
673
+ u[1] = u[1] * b + c;
674
+ u[2] = u[2] * b + c;
675
+ u[3] = u[3] * b + c;
676
+ return _mm256_loadu_si256((__m256i*)u);
677
+ }
678
+ }
679
+ }
680
+ #endif
681
+
682
+ #ifdef EIGEN_VECTORIZE_SSE2
683
+ #include <xmmintrin.h>
684
+
685
+ namespace Eigen
686
+ {
687
+ namespace internal
688
+ {
689
+ template<>
690
+ struct reinterpreter<Packet4i>
691
+ {
692
+ EIGEN_STRONG_INLINE Packet4f to_float(const Packet4i& x)
693
+ {
694
+ return _mm_castsi128_ps(x);
695
+ }
696
+
697
+ EIGEN_STRONG_INLINE Packet2d to_double(const Packet4i& x)
698
+ {
699
+ return _mm_castsi128_pd(x);
700
+ }
701
+
702
+ EIGEN_STRONG_INLINE Packet4i to_int(const Packet4i& x)
703
+ {
704
+ return x;
705
+ }
706
+ };
707
+
708
+ template<>
709
+ struct reinterpreter<Packet4f>
710
+ {
711
+ EIGEN_STRONG_INLINE Packet4f to_float(const Packet4f& x)
712
+ {
713
+ return x;
714
+ }
715
+
716
+ EIGEN_STRONG_INLINE Packet2d to_double(const Packet4f& x)
717
+ {
718
+ return _mm_castps_pd(x);
719
+ }
720
+
721
+ EIGEN_STRONG_INLINE Packet4i to_int(const Packet4f& x)
722
+ {
723
+ return _mm_castps_si128(x);
724
+ }
725
+ };
726
+
727
+ template<>
728
+ struct reinterpreter<Packet2d>
729
+ {
730
+ EIGEN_STRONG_INLINE Packet4f to_float(const Packet2d& x)
731
+ {
732
+ return _mm_castpd_ps(x);
733
+ }
734
+
735
+ EIGEN_STRONG_INLINE Packet2d to_double(const Packet2d& x)
736
+ {
737
+ return x;
738
+ }
739
+
740
+ EIGEN_STRONG_INLINE Packet4i to_int(const Packet2d& x)
741
+ {
742
+ return _mm_castpd_si128(x);
743
+ }
744
+ };
745
+
746
+ EIGEN_STRONG_INLINE void split_two(const Packet4i& x, uint64_t& a, uint64_t& b)
747
+ {
748
+ #ifdef EIGEN_VECTORIZE_SSE4_1
749
+ a = _mm_extract_epi64(x, 0);
750
+ b = _mm_extract_epi64(x, 1);
751
+ #else
752
+ uint64_t u[2];
753
+ _mm_storeu_si128((__m128i*)u, x);
754
+ a = u[0];
755
+ b = u[1];
756
+ #endif
757
+ }
758
+
759
+ EIGEN_STRONG_INLINE Packet4i combine_low32(const Packet4i& a, const Packet4i& b)
760
+ {
761
+ auto sa = _mm_shuffle_epi32(a, _MM_SHUFFLE(3, 1, 2, 0));
762
+ auto sb = _mm_shuffle_epi32(b, _MM_SHUFFLE(2, 0, 3, 1));
763
+ sa = _mm_and_si128(sa, _mm_setr_epi32(-1, -1, 0, 0));
764
+ sb = _mm_and_si128(sb, _mm_setr_epi32(0, 0, -1, -1));
765
+ return _mm_or_si128(sa, sb);
766
+ }
767
+
768
+ template<>
769
+ EIGEN_STRONG_INLINE Packet4i pseti64<Packet4i>(uint64_t a)
770
+ {
771
+ return _mm_set1_epi64x(a);
772
+ }
773
+
774
+ template<>
775
+ EIGEN_STRONG_INLINE Packet4i pcmpeq<Packet4i>(const Packet4i& a, const Packet4i& b)
776
+ {
777
+ return _mm_cmpeq_epi32(a, b);
778
+ }
779
+
780
+ template<>
781
+ EIGEN_STRONG_INLINE Packet4i psll<Packet4i>(const Packet4i& a, int b)
782
+ {
783
+ return _mm_slli_epi32(a, b);
784
+ }
785
+
786
+ template<>
787
+ EIGEN_STRONG_INLINE Packet4i psrl<Packet4i>(const Packet4i& a, int b)
788
+ {
789
+ return _mm_srli_epi32(a, b);
790
+ }
791
+
792
+
793
+ template<>
794
+ EIGEN_STRONG_INLINE Packet4i psll64<Packet4i>(const Packet4i& a, int b)
795
+ {
796
+ return _mm_slli_epi64(a, b);
797
+ }
798
+
799
+ template<>
800
+ EIGEN_STRONG_INLINE Packet4i psrl64<Packet4i>(const Packet4i& a, int b)
801
+ {
802
+ return _mm_srli_epi64(a, b);
803
+ }
804
+
805
+ template<>
806
+ EIGEN_STRONG_INLINE Packet4i pcmplt<Packet4i>(const Packet4i& a, const Packet4i& b)
807
+ {
808
+ return _mm_cmplt_epi32(a, b);
809
+ }
810
+
811
+ template<>
812
+ EIGEN_STRONG_INLINE Packet4f pcmplt<Packet4f>(const Packet4f& a, const Packet4f& b)
813
+ {
814
+ return _mm_cmplt_ps(a, b);
815
+ }
816
+
817
+ template<>
818
+ EIGEN_STRONG_INLINE Packet4f pcmple<Packet4f>(const Packet4f& a, const Packet4f& b)
819
+ {
820
+ return _mm_cmple_ps(a, b);
821
+ }
822
+
823
+ template<>
824
+ EIGEN_STRONG_INLINE Packet2d pcmplt<Packet2d>(const Packet2d& a, const Packet2d& b)
825
+ {
826
+ return _mm_cmplt_pd(a, b);
827
+ }
828
+
829
+ template<>
830
+ EIGEN_STRONG_INLINE Packet2d pcmple<Packet2d>(const Packet2d& a, const Packet2d& b)
831
+ {
832
+ return _mm_cmple_pd(a, b);
833
+ }
834
+
835
+ template<>
836
+ EIGEN_STRONG_INLINE Packet4f pblendv(const Packet4f& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket)
837
+ {
838
+ #ifdef EIGEN_VECTORIZE_SSE4_1
839
+ return _mm_blendv_ps(elsePacket, thenPacket, ifPacket);
840
+ #else
841
+ return _mm_or_ps(_mm_and_ps(ifPacket, thenPacket), _mm_andnot_ps(ifPacket, elsePacket));
842
+ #endif
843
+ }
844
+
845
+ template<>
846
+ EIGEN_STRONG_INLINE Packet4i pblendv(const Packet4i& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket)
847
+ {
848
+ #ifdef EIGEN_VECTORIZE_SSE4_1
849
+ return _mm_castps_si128(_mm_blendv_ps(_mm_castsi128_ps(elsePacket), _mm_castsi128_ps(thenPacket), _mm_castsi128_ps(ifPacket)));
850
+ #else
851
+ return _mm_or_si128(_mm_and_si128(ifPacket, thenPacket), _mm_andnot_si128(ifPacket, elsePacket));
852
+ #endif
853
+ }
854
+
855
+ template<>
856
+ EIGEN_STRONG_INLINE Packet2d pblendv(const Packet2d& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket)
857
+ {
858
+ #ifdef EIGEN_VECTORIZE_SSE4_1
859
+ return _mm_blendv_pd(elsePacket, thenPacket, ifPacket);
860
+ #else
861
+ return _mm_or_pd(_mm_and_pd(ifPacket, thenPacket), _mm_andnot_pd(ifPacket, elsePacket));
862
+ #endif
863
+ }
864
+
865
+ template<>
866
+ EIGEN_STRONG_INLINE Packet4i pgather<Packet4i>(const int* addr, const Packet4i& index)
867
+ {
868
+ #ifdef EIGEN_VECTORIZE_AVX2
869
+ return _mm_i32gather_epi32(addr, index, 4);
870
+ #else
871
+ uint32_t u[4];
872
+ _mm_storeu_si128((Packet4i*)u, index);
873
+ return _mm_setr_epi32(addr[u[0]], addr[u[1]], addr[u[2]], addr[u[3]]);
874
+ #endif
875
+ }
876
+
877
+ template<>
878
+ EIGEN_STRONG_INLINE Packet4f pgather<Packet4i>(const float* addr, const Packet4i& index)
879
+ {
880
+ #ifdef EIGEN_VECTORIZE_AVX2
881
+ return _mm_i32gather_ps(addr, index, 4);
882
+ #else
883
+ uint32_t u[4];
884
+ _mm_storeu_si128((Packet4i*)u, index);
885
+ return _mm_setr_ps(addr[u[0]], addr[u[1]], addr[u[2]], addr[u[3]]);
886
+ #endif
887
+ }
888
+
889
+ template<>
890
+ EIGEN_STRONG_INLINE Packet2d pgather<Packet4i>(const double* addr, const Packet4i& index, bool upperhalf)
891
+ {
892
+ #ifdef EIGEN_VECTORIZE_AVX2
893
+ return _mm_i32gather_pd(addr, index, 8);
894
+ #else
895
+ uint32_t u[4];
896
+ _mm_storeu_si128((Packet4i*)u, index);
897
+ if (upperhalf)
898
+ {
899
+ return _mm_setr_pd(addr[u[2]], addr[u[3]]);
900
+ }
901
+ else
902
+ {
903
+ return _mm_setr_pd(addr[u[0]], addr[u[1]]);
904
+ }
905
+ #endif
906
+ }
907
+
908
+ template<>
909
+ EIGEN_STRONG_INLINE int pmovemask<Packet4f>(const Packet4f& a)
910
+ {
911
+ return _mm_movemask_ps(a);
912
+ }
913
+
914
+ template<>
915
+ EIGEN_STRONG_INLINE int pmovemask<Packet2d>(const Packet2d& a)
916
+ {
917
+ return _mm_movemask_pd(a);
918
+ }
919
+
920
+ template<>
921
+ EIGEN_STRONG_INLINE int pmovemask<Packet4i>(const Packet4i& a)
922
+ {
923
+ return pmovemask(_mm_castsi128_ps(a));
924
+ }
925
+
926
+ template<>
927
+ EIGEN_STRONG_INLINE Packet4f ptruncate<Packet4f>(const Packet4f& a)
928
+ {
929
+ #ifdef EIGEN_VECTORIZE_SSE4_1
930
+ return _mm_round_ps(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
931
+ #else
932
+ auto round = _MM_GET_ROUNDING_MODE();
933
+ _MM_SET_ROUNDING_MODE(_MM_ROUND_TOWARD_ZERO);
934
+ auto ret = _mm_cvtepi32_ps(_mm_cvtps_epi32(a));
935
+ _MM_SET_ROUNDING_MODE(round);
936
+ return ret;
937
+ #endif
938
+ }
939
+
940
+ template<>
941
+ EIGEN_STRONG_INLINE Packet2d ptruncate<Packet2d>(const Packet2d& a)
942
+ {
943
+ #ifdef EIGEN_VECTORIZE_SSE4_1
944
+ return _mm_round_pd(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
945
+ #else
946
+ auto round = _MM_GET_ROUNDING_MODE();
947
+ _MM_SET_ROUNDING_MODE(_MM_ROUND_TOWARD_ZERO);
948
+ auto ret = _mm_cvtepi32_pd(_mm_cvtpd_epi32(a));
949
+ _MM_SET_ROUNDING_MODE(round);
950
+ return ret;
951
+ #endif
952
+ }
953
+
954
+ template<>
955
+ EIGEN_STRONG_INLINE Packet4i pcmpeq64<Packet4i>(const Packet4i& a, const Packet4i& b)
956
+ {
957
+ #ifdef EIGEN_VECTORIZE_SSE4_1
958
+ return _mm_cmpeq_epi64(a, b);
959
+ #else
960
+ Packet4i c = _mm_cmpeq_epi32(a, b);
961
+ return pand(c, _mm_shuffle_epi32(c, _MM_SHUFFLE(2, 3, 0, 1)));
962
+ #endif
963
+ }
964
+
965
+ template<>
966
+ EIGEN_STRONG_INLINE Packet4i pmuluadd64<Packet4i>(const Packet4i& a, uint64_t b, uint64_t c)
967
+ {
968
+ uint64_t u[2];
969
+ _mm_storeu_si128((__m128i*)u, a);
970
+ u[0] = u[0] * b + c;
971
+ u[1] = u[1] * b + c;
972
+ return _mm_loadu_si128((__m128i*)u);
973
+ }
974
+ }
975
+ }
976
+ #endif
977
+
978
+ #endif