umappp 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (395) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE.txt +25 -0
  3. data/README.md +110 -0
  4. data/ext/umappp/extconf.rb +25 -0
  5. data/ext/umappp/numo.hpp +867 -0
  6. data/ext/umappp/umappp.cpp +225 -0
  7. data/lib/umappp/version.rb +5 -0
  8. data/lib/umappp.rb +41 -0
  9. data/vendor/Eigen/Cholesky +45 -0
  10. data/vendor/Eigen/CholmodSupport +48 -0
  11. data/vendor/Eigen/Core +384 -0
  12. data/vendor/Eigen/Dense +7 -0
  13. data/vendor/Eigen/Eigen +2 -0
  14. data/vendor/Eigen/Eigenvalues +60 -0
  15. data/vendor/Eigen/Geometry +59 -0
  16. data/vendor/Eigen/Householder +29 -0
  17. data/vendor/Eigen/IterativeLinearSolvers +48 -0
  18. data/vendor/Eigen/Jacobi +32 -0
  19. data/vendor/Eigen/KLUSupport +41 -0
  20. data/vendor/Eigen/LU +47 -0
  21. data/vendor/Eigen/MetisSupport +35 -0
  22. data/vendor/Eigen/OrderingMethods +70 -0
  23. data/vendor/Eigen/PaStiXSupport +49 -0
  24. data/vendor/Eigen/PardisoSupport +35 -0
  25. data/vendor/Eigen/QR +50 -0
  26. data/vendor/Eigen/QtAlignedMalloc +39 -0
  27. data/vendor/Eigen/SPQRSupport +34 -0
  28. data/vendor/Eigen/SVD +50 -0
  29. data/vendor/Eigen/Sparse +34 -0
  30. data/vendor/Eigen/SparseCholesky +37 -0
  31. data/vendor/Eigen/SparseCore +69 -0
  32. data/vendor/Eigen/SparseLU +50 -0
  33. data/vendor/Eigen/SparseQR +36 -0
  34. data/vendor/Eigen/StdDeque +27 -0
  35. data/vendor/Eigen/StdList +26 -0
  36. data/vendor/Eigen/StdVector +27 -0
  37. data/vendor/Eigen/SuperLUSupport +64 -0
  38. data/vendor/Eigen/UmfPackSupport +40 -0
  39. data/vendor/Eigen/src/Cholesky/LDLT.h +688 -0
  40. data/vendor/Eigen/src/Cholesky/LLT.h +558 -0
  41. data/vendor/Eigen/src/Cholesky/LLT_LAPACKE.h +99 -0
  42. data/vendor/Eigen/src/CholmodSupport/CholmodSupport.h +682 -0
  43. data/vendor/Eigen/src/Core/ArithmeticSequence.h +413 -0
  44. data/vendor/Eigen/src/Core/Array.h +417 -0
  45. data/vendor/Eigen/src/Core/ArrayBase.h +226 -0
  46. data/vendor/Eigen/src/Core/ArrayWrapper.h +209 -0
  47. data/vendor/Eigen/src/Core/Assign.h +90 -0
  48. data/vendor/Eigen/src/Core/AssignEvaluator.h +1010 -0
  49. data/vendor/Eigen/src/Core/Assign_MKL.h +178 -0
  50. data/vendor/Eigen/src/Core/BandMatrix.h +353 -0
  51. data/vendor/Eigen/src/Core/Block.h +448 -0
  52. data/vendor/Eigen/src/Core/BooleanRedux.h +162 -0
  53. data/vendor/Eigen/src/Core/CommaInitializer.h +164 -0
  54. data/vendor/Eigen/src/Core/ConditionEstimator.h +175 -0
  55. data/vendor/Eigen/src/Core/CoreEvaluators.h +1741 -0
  56. data/vendor/Eigen/src/Core/CoreIterators.h +132 -0
  57. data/vendor/Eigen/src/Core/CwiseBinaryOp.h +183 -0
  58. data/vendor/Eigen/src/Core/CwiseNullaryOp.h +1001 -0
  59. data/vendor/Eigen/src/Core/CwiseTernaryOp.h +197 -0
  60. data/vendor/Eigen/src/Core/CwiseUnaryOp.h +103 -0
  61. data/vendor/Eigen/src/Core/CwiseUnaryView.h +132 -0
  62. data/vendor/Eigen/src/Core/DenseBase.h +701 -0
  63. data/vendor/Eigen/src/Core/DenseCoeffsBase.h +685 -0
  64. data/vendor/Eigen/src/Core/DenseStorage.h +652 -0
  65. data/vendor/Eigen/src/Core/Diagonal.h +258 -0
  66. data/vendor/Eigen/src/Core/DiagonalMatrix.h +391 -0
  67. data/vendor/Eigen/src/Core/DiagonalProduct.h +28 -0
  68. data/vendor/Eigen/src/Core/Dot.h +318 -0
  69. data/vendor/Eigen/src/Core/EigenBase.h +160 -0
  70. data/vendor/Eigen/src/Core/ForceAlignedAccess.h +150 -0
  71. data/vendor/Eigen/src/Core/Fuzzy.h +155 -0
  72. data/vendor/Eigen/src/Core/GeneralProduct.h +465 -0
  73. data/vendor/Eigen/src/Core/GenericPacketMath.h +1040 -0
  74. data/vendor/Eigen/src/Core/GlobalFunctions.h +194 -0
  75. data/vendor/Eigen/src/Core/IO.h +258 -0
  76. data/vendor/Eigen/src/Core/IndexedView.h +237 -0
  77. data/vendor/Eigen/src/Core/Inverse.h +117 -0
  78. data/vendor/Eigen/src/Core/Map.h +171 -0
  79. data/vendor/Eigen/src/Core/MapBase.h +310 -0
  80. data/vendor/Eigen/src/Core/MathFunctions.h +2057 -0
  81. data/vendor/Eigen/src/Core/MathFunctionsImpl.h +200 -0
  82. data/vendor/Eigen/src/Core/Matrix.h +565 -0
  83. data/vendor/Eigen/src/Core/MatrixBase.h +547 -0
  84. data/vendor/Eigen/src/Core/NestByValue.h +85 -0
  85. data/vendor/Eigen/src/Core/NoAlias.h +109 -0
  86. data/vendor/Eigen/src/Core/NumTraits.h +335 -0
  87. data/vendor/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
  88. data/vendor/Eigen/src/Core/PermutationMatrix.h +605 -0
  89. data/vendor/Eigen/src/Core/PlainObjectBase.h +1128 -0
  90. data/vendor/Eigen/src/Core/Product.h +191 -0
  91. data/vendor/Eigen/src/Core/ProductEvaluators.h +1179 -0
  92. data/vendor/Eigen/src/Core/Random.h +218 -0
  93. data/vendor/Eigen/src/Core/Redux.h +515 -0
  94. data/vendor/Eigen/src/Core/Ref.h +381 -0
  95. data/vendor/Eigen/src/Core/Replicate.h +142 -0
  96. data/vendor/Eigen/src/Core/Reshaped.h +454 -0
  97. data/vendor/Eigen/src/Core/ReturnByValue.h +119 -0
  98. data/vendor/Eigen/src/Core/Reverse.h +217 -0
  99. data/vendor/Eigen/src/Core/Select.h +164 -0
  100. data/vendor/Eigen/src/Core/SelfAdjointView.h +365 -0
  101. data/vendor/Eigen/src/Core/SelfCwiseBinaryOp.h +47 -0
  102. data/vendor/Eigen/src/Core/Solve.h +188 -0
  103. data/vendor/Eigen/src/Core/SolveTriangular.h +235 -0
  104. data/vendor/Eigen/src/Core/SolverBase.h +168 -0
  105. data/vendor/Eigen/src/Core/StableNorm.h +251 -0
  106. data/vendor/Eigen/src/Core/StlIterators.h +463 -0
  107. data/vendor/Eigen/src/Core/Stride.h +116 -0
  108. data/vendor/Eigen/src/Core/Swap.h +68 -0
  109. data/vendor/Eigen/src/Core/Transpose.h +464 -0
  110. data/vendor/Eigen/src/Core/Transpositions.h +386 -0
  111. data/vendor/Eigen/src/Core/TriangularMatrix.h +1001 -0
  112. data/vendor/Eigen/src/Core/VectorBlock.h +96 -0
  113. data/vendor/Eigen/src/Core/VectorwiseOp.h +784 -0
  114. data/vendor/Eigen/src/Core/Visitor.h +381 -0
  115. data/vendor/Eigen/src/Core/arch/AVX/Complex.h +372 -0
  116. data/vendor/Eigen/src/Core/arch/AVX/MathFunctions.h +228 -0
  117. data/vendor/Eigen/src/Core/arch/AVX/PacketMath.h +1574 -0
  118. data/vendor/Eigen/src/Core/arch/AVX/TypeCasting.h +115 -0
  119. data/vendor/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
  120. data/vendor/Eigen/src/Core/arch/AVX512/MathFunctions.h +362 -0
  121. data/vendor/Eigen/src/Core/arch/AVX512/PacketMath.h +2303 -0
  122. data/vendor/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
  123. data/vendor/Eigen/src/Core/arch/AltiVec/Complex.h +417 -0
  124. data/vendor/Eigen/src/Core/arch/AltiVec/MathFunctions.h +90 -0
  125. data/vendor/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
  126. data/vendor/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
  127. data/vendor/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
  128. data/vendor/Eigen/src/Core/arch/AltiVec/PacketMath.h +2711 -0
  129. data/vendor/Eigen/src/Core/arch/CUDA/Complex.h +258 -0
  130. data/vendor/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
  131. data/vendor/Eigen/src/Core/arch/Default/ConjHelper.h +117 -0
  132. data/vendor/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
  133. data/vendor/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
  134. data/vendor/Eigen/src/Core/arch/Default/Half.h +942 -0
  135. data/vendor/Eigen/src/Core/arch/Default/Settings.h +49 -0
  136. data/vendor/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
  137. data/vendor/Eigen/src/Core/arch/GPU/MathFunctions.h +103 -0
  138. data/vendor/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
  139. data/vendor/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
  140. data/vendor/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
  141. data/vendor/Eigen/src/Core/arch/MSA/Complex.h +648 -0
  142. data/vendor/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
  143. data/vendor/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
  144. data/vendor/Eigen/src/Core/arch/NEON/Complex.h +584 -0
  145. data/vendor/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
  146. data/vendor/Eigen/src/Core/arch/NEON/MathFunctions.h +75 -0
  147. data/vendor/Eigen/src/Core/arch/NEON/PacketMath.h +4587 -0
  148. data/vendor/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
  149. data/vendor/Eigen/src/Core/arch/SSE/Complex.h +351 -0
  150. data/vendor/Eigen/src/Core/arch/SSE/MathFunctions.h +199 -0
  151. data/vendor/Eigen/src/Core/arch/SSE/PacketMath.h +1505 -0
  152. data/vendor/Eigen/src/Core/arch/SSE/TypeCasting.h +142 -0
  153. data/vendor/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
  154. data/vendor/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
  155. data/vendor/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
  156. data/vendor/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
  157. data/vendor/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
  158. data/vendor/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
  159. data/vendor/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
  160. data/vendor/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
  161. data/vendor/Eigen/src/Core/arch/ZVector/Complex.h +426 -0
  162. data/vendor/Eigen/src/Core/arch/ZVector/MathFunctions.h +233 -0
  163. data/vendor/Eigen/src/Core/arch/ZVector/PacketMath.h +1060 -0
  164. data/vendor/Eigen/src/Core/functors/AssignmentFunctors.h +177 -0
  165. data/vendor/Eigen/src/Core/functors/BinaryFunctors.h +541 -0
  166. data/vendor/Eigen/src/Core/functors/NullaryFunctors.h +189 -0
  167. data/vendor/Eigen/src/Core/functors/StlFunctors.h +166 -0
  168. data/vendor/Eigen/src/Core/functors/TernaryFunctors.h +25 -0
  169. data/vendor/Eigen/src/Core/functors/UnaryFunctors.h +1131 -0
  170. data/vendor/Eigen/src/Core/products/GeneralBlockPanelKernel.h +2645 -0
  171. data/vendor/Eigen/src/Core/products/GeneralMatrixMatrix.h +517 -0
  172. data/vendor/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +317 -0
  173. data/vendor/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +145 -0
  174. data/vendor/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +124 -0
  175. data/vendor/Eigen/src/Core/products/GeneralMatrixVector.h +518 -0
  176. data/vendor/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +136 -0
  177. data/vendor/Eigen/src/Core/products/Parallelizer.h +180 -0
  178. data/vendor/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +544 -0
  179. data/vendor/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +295 -0
  180. data/vendor/Eigen/src/Core/products/SelfadjointMatrixVector.h +262 -0
  181. data/vendor/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +118 -0
  182. data/vendor/Eigen/src/Core/products/SelfadjointProduct.h +133 -0
  183. data/vendor/Eigen/src/Core/products/SelfadjointRank2Update.h +94 -0
  184. data/vendor/Eigen/src/Core/products/TriangularMatrixMatrix.h +472 -0
  185. data/vendor/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +317 -0
  186. data/vendor/Eigen/src/Core/products/TriangularMatrixVector.h +350 -0
  187. data/vendor/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +255 -0
  188. data/vendor/Eigen/src/Core/products/TriangularSolverMatrix.h +337 -0
  189. data/vendor/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +167 -0
  190. data/vendor/Eigen/src/Core/products/TriangularSolverVector.h +148 -0
  191. data/vendor/Eigen/src/Core/util/BlasUtil.h +583 -0
  192. data/vendor/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
  193. data/vendor/Eigen/src/Core/util/Constants.h +563 -0
  194. data/vendor/Eigen/src/Core/util/DisableStupidWarnings.h +106 -0
  195. data/vendor/Eigen/src/Core/util/ForwardDeclarations.h +322 -0
  196. data/vendor/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
  197. data/vendor/Eigen/src/Core/util/IntegralConstant.h +272 -0
  198. data/vendor/Eigen/src/Core/util/MKL_support.h +137 -0
  199. data/vendor/Eigen/src/Core/util/Macros.h +1464 -0
  200. data/vendor/Eigen/src/Core/util/Memory.h +1163 -0
  201. data/vendor/Eigen/src/Core/util/Meta.h +812 -0
  202. data/vendor/Eigen/src/Core/util/NonMPL2.h +3 -0
  203. data/vendor/Eigen/src/Core/util/ReenableStupidWarnings.h +31 -0
  204. data/vendor/Eigen/src/Core/util/ReshapedHelper.h +51 -0
  205. data/vendor/Eigen/src/Core/util/StaticAssert.h +221 -0
  206. data/vendor/Eigen/src/Core/util/SymbolicIndex.h +293 -0
  207. data/vendor/Eigen/src/Core/util/XprHelper.h +856 -0
  208. data/vendor/Eigen/src/Eigenvalues/ComplexEigenSolver.h +346 -0
  209. data/vendor/Eigen/src/Eigenvalues/ComplexSchur.h +462 -0
  210. data/vendor/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +91 -0
  211. data/vendor/Eigen/src/Eigenvalues/EigenSolver.h +622 -0
  212. data/vendor/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +418 -0
  213. data/vendor/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +226 -0
  214. data/vendor/Eigen/src/Eigenvalues/HessenbergDecomposition.h +374 -0
  215. data/vendor/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +158 -0
  216. data/vendor/Eigen/src/Eigenvalues/RealQZ.h +657 -0
  217. data/vendor/Eigen/src/Eigenvalues/RealSchur.h +558 -0
  218. data/vendor/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +77 -0
  219. data/vendor/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +904 -0
  220. data/vendor/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +87 -0
  221. data/vendor/Eigen/src/Eigenvalues/Tridiagonalization.h +561 -0
  222. data/vendor/Eigen/src/Geometry/AlignedBox.h +486 -0
  223. data/vendor/Eigen/src/Geometry/AngleAxis.h +247 -0
  224. data/vendor/Eigen/src/Geometry/EulerAngles.h +114 -0
  225. data/vendor/Eigen/src/Geometry/Homogeneous.h +501 -0
  226. data/vendor/Eigen/src/Geometry/Hyperplane.h +282 -0
  227. data/vendor/Eigen/src/Geometry/OrthoMethods.h +235 -0
  228. data/vendor/Eigen/src/Geometry/ParametrizedLine.h +232 -0
  229. data/vendor/Eigen/src/Geometry/Quaternion.h +870 -0
  230. data/vendor/Eigen/src/Geometry/Rotation2D.h +199 -0
  231. data/vendor/Eigen/src/Geometry/RotationBase.h +206 -0
  232. data/vendor/Eigen/src/Geometry/Scaling.h +188 -0
  233. data/vendor/Eigen/src/Geometry/Transform.h +1563 -0
  234. data/vendor/Eigen/src/Geometry/Translation.h +202 -0
  235. data/vendor/Eigen/src/Geometry/Umeyama.h +166 -0
  236. data/vendor/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
  237. data/vendor/Eigen/src/Householder/BlockHouseholder.h +110 -0
  238. data/vendor/Eigen/src/Householder/Householder.h +176 -0
  239. data/vendor/Eigen/src/Householder/HouseholderSequence.h +545 -0
  240. data/vendor/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +226 -0
  241. data/vendor/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +212 -0
  242. data/vendor/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +229 -0
  243. data/vendor/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +394 -0
  244. data/vendor/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +453 -0
  245. data/vendor/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +444 -0
  246. data/vendor/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +198 -0
  247. data/vendor/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +117 -0
  248. data/vendor/Eigen/src/Jacobi/Jacobi.h +483 -0
  249. data/vendor/Eigen/src/KLUSupport/KLUSupport.h +358 -0
  250. data/vendor/Eigen/src/LU/Determinant.h +117 -0
  251. data/vendor/Eigen/src/LU/FullPivLU.h +877 -0
  252. data/vendor/Eigen/src/LU/InverseImpl.h +432 -0
  253. data/vendor/Eigen/src/LU/PartialPivLU.h +624 -0
  254. data/vendor/Eigen/src/LU/PartialPivLU_LAPACKE.h +83 -0
  255. data/vendor/Eigen/src/LU/arch/InverseSize4.h +351 -0
  256. data/vendor/Eigen/src/MetisSupport/MetisSupport.h +137 -0
  257. data/vendor/Eigen/src/OrderingMethods/Amd.h +435 -0
  258. data/vendor/Eigen/src/OrderingMethods/Eigen_Colamd.h +1863 -0
  259. data/vendor/Eigen/src/OrderingMethods/Ordering.h +153 -0
  260. data/vendor/Eigen/src/PaStiXSupport/PaStiXSupport.h +678 -0
  261. data/vendor/Eigen/src/PardisoSupport/PardisoSupport.h +545 -0
  262. data/vendor/Eigen/src/QR/ColPivHouseholderQR.h +674 -0
  263. data/vendor/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +97 -0
  264. data/vendor/Eigen/src/QR/CompleteOrthogonalDecomposition.h +635 -0
  265. data/vendor/Eigen/src/QR/FullPivHouseholderQR.h +713 -0
  266. data/vendor/Eigen/src/QR/HouseholderQR.h +434 -0
  267. data/vendor/Eigen/src/QR/HouseholderQR_LAPACKE.h +68 -0
  268. data/vendor/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +335 -0
  269. data/vendor/Eigen/src/SVD/BDCSVD.h +1366 -0
  270. data/vendor/Eigen/src/SVD/JacobiSVD.h +812 -0
  271. data/vendor/Eigen/src/SVD/JacobiSVD_LAPACKE.h +91 -0
  272. data/vendor/Eigen/src/SVD/SVDBase.h +376 -0
  273. data/vendor/Eigen/src/SVD/UpperBidiagonalization.h +414 -0
  274. data/vendor/Eigen/src/SparseCholesky/SimplicialCholesky.h +697 -0
  275. data/vendor/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +174 -0
  276. data/vendor/Eigen/src/SparseCore/AmbiVector.h +378 -0
  277. data/vendor/Eigen/src/SparseCore/CompressedStorage.h +274 -0
  278. data/vendor/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +352 -0
  279. data/vendor/Eigen/src/SparseCore/MappedSparseMatrix.h +67 -0
  280. data/vendor/Eigen/src/SparseCore/SparseAssign.h +270 -0
  281. data/vendor/Eigen/src/SparseCore/SparseBlock.h +571 -0
  282. data/vendor/Eigen/src/SparseCore/SparseColEtree.h +206 -0
  283. data/vendor/Eigen/src/SparseCore/SparseCompressedBase.h +370 -0
  284. data/vendor/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +722 -0
  285. data/vendor/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +150 -0
  286. data/vendor/Eigen/src/SparseCore/SparseDenseProduct.h +342 -0
  287. data/vendor/Eigen/src/SparseCore/SparseDiagonalProduct.h +138 -0
  288. data/vendor/Eigen/src/SparseCore/SparseDot.h +98 -0
  289. data/vendor/Eigen/src/SparseCore/SparseFuzzy.h +29 -0
  290. data/vendor/Eigen/src/SparseCore/SparseMap.h +305 -0
  291. data/vendor/Eigen/src/SparseCore/SparseMatrix.h +1518 -0
  292. data/vendor/Eigen/src/SparseCore/SparseMatrixBase.h +398 -0
  293. data/vendor/Eigen/src/SparseCore/SparsePermutation.h +178 -0
  294. data/vendor/Eigen/src/SparseCore/SparseProduct.h +181 -0
  295. data/vendor/Eigen/src/SparseCore/SparseRedux.h +49 -0
  296. data/vendor/Eigen/src/SparseCore/SparseRef.h +397 -0
  297. data/vendor/Eigen/src/SparseCore/SparseSelfAdjointView.h +659 -0
  298. data/vendor/Eigen/src/SparseCore/SparseSolverBase.h +124 -0
  299. data/vendor/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +198 -0
  300. data/vendor/Eigen/src/SparseCore/SparseTranspose.h +92 -0
  301. data/vendor/Eigen/src/SparseCore/SparseTriangularView.h +189 -0
  302. data/vendor/Eigen/src/SparseCore/SparseUtil.h +186 -0
  303. data/vendor/Eigen/src/SparseCore/SparseVector.h +478 -0
  304. data/vendor/Eigen/src/SparseCore/SparseView.h +254 -0
  305. data/vendor/Eigen/src/SparseCore/TriangularSolver.h +315 -0
  306. data/vendor/Eigen/src/SparseLU/SparseLU.h +923 -0
  307. data/vendor/Eigen/src/SparseLU/SparseLUImpl.h +66 -0
  308. data/vendor/Eigen/src/SparseLU/SparseLU_Memory.h +226 -0
  309. data/vendor/Eigen/src/SparseLU/SparseLU_Structs.h +110 -0
  310. data/vendor/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +375 -0
  311. data/vendor/Eigen/src/SparseLU/SparseLU_Utils.h +80 -0
  312. data/vendor/Eigen/src/SparseLU/SparseLU_column_bmod.h +181 -0
  313. data/vendor/Eigen/src/SparseLU/SparseLU_column_dfs.h +179 -0
  314. data/vendor/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +107 -0
  315. data/vendor/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +280 -0
  316. data/vendor/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +126 -0
  317. data/vendor/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +130 -0
  318. data/vendor/Eigen/src/SparseLU/SparseLU_panel_bmod.h +223 -0
  319. data/vendor/Eigen/src/SparseLU/SparseLU_panel_dfs.h +258 -0
  320. data/vendor/Eigen/src/SparseLU/SparseLU_pivotL.h +137 -0
  321. data/vendor/Eigen/src/SparseLU/SparseLU_pruneL.h +136 -0
  322. data/vendor/Eigen/src/SparseLU/SparseLU_relax_snode.h +83 -0
  323. data/vendor/Eigen/src/SparseQR/SparseQR.h +758 -0
  324. data/vendor/Eigen/src/StlSupport/StdDeque.h +116 -0
  325. data/vendor/Eigen/src/StlSupport/StdList.h +106 -0
  326. data/vendor/Eigen/src/StlSupport/StdVector.h +131 -0
  327. data/vendor/Eigen/src/StlSupport/details.h +84 -0
  328. data/vendor/Eigen/src/SuperLUSupport/SuperLUSupport.h +1025 -0
  329. data/vendor/Eigen/src/UmfPackSupport/UmfPackSupport.h +642 -0
  330. data/vendor/Eigen/src/misc/Image.h +82 -0
  331. data/vendor/Eigen/src/misc/Kernel.h +79 -0
  332. data/vendor/Eigen/src/misc/RealSvd2x2.h +55 -0
  333. data/vendor/Eigen/src/misc/blas.h +440 -0
  334. data/vendor/Eigen/src/misc/lapack.h +152 -0
  335. data/vendor/Eigen/src/misc/lapacke.h +16292 -0
  336. data/vendor/Eigen/src/misc/lapacke_mangling.h +17 -0
  337. data/vendor/Eigen/src/plugins/ArrayCwiseBinaryOps.h +358 -0
  338. data/vendor/Eigen/src/plugins/ArrayCwiseUnaryOps.h +696 -0
  339. data/vendor/Eigen/src/plugins/BlockMethods.h +1442 -0
  340. data/vendor/Eigen/src/plugins/CommonCwiseBinaryOps.h +115 -0
  341. data/vendor/Eigen/src/plugins/CommonCwiseUnaryOps.h +177 -0
  342. data/vendor/Eigen/src/plugins/IndexedViewMethods.h +262 -0
  343. data/vendor/Eigen/src/plugins/MatrixCwiseBinaryOps.h +152 -0
  344. data/vendor/Eigen/src/plugins/MatrixCwiseUnaryOps.h +95 -0
  345. data/vendor/Eigen/src/plugins/ReshapedMethods.h +149 -0
  346. data/vendor/aarand/aarand.hpp +114 -0
  347. data/vendor/annoy/annoylib.h +1495 -0
  348. data/vendor/annoy/kissrandom.h +120 -0
  349. data/vendor/annoy/mman.h +242 -0
  350. data/vendor/hnswlib/bruteforce.h +152 -0
  351. data/vendor/hnswlib/hnswalg.h +1192 -0
  352. data/vendor/hnswlib/hnswlib.h +108 -0
  353. data/vendor/hnswlib/space_ip.h +282 -0
  354. data/vendor/hnswlib/space_l2.h +281 -0
  355. data/vendor/hnswlib/visited_list_pool.h +79 -0
  356. data/vendor/irlba/irlba.hpp +575 -0
  357. data/vendor/irlba/lanczos.hpp +212 -0
  358. data/vendor/irlba/parallel.hpp +474 -0
  359. data/vendor/irlba/utils.hpp +224 -0
  360. data/vendor/irlba/wrappers.hpp +228 -0
  361. data/vendor/kmeans/Base.hpp +75 -0
  362. data/vendor/kmeans/Details.hpp +79 -0
  363. data/vendor/kmeans/HartiganWong.hpp +492 -0
  364. data/vendor/kmeans/InitializeKmeansPP.hpp +144 -0
  365. data/vendor/kmeans/InitializeNone.hpp +44 -0
  366. data/vendor/kmeans/InitializePCAPartition.hpp +309 -0
  367. data/vendor/kmeans/InitializeRandom.hpp +91 -0
  368. data/vendor/kmeans/Kmeans.hpp +161 -0
  369. data/vendor/kmeans/Lloyd.hpp +134 -0
  370. data/vendor/kmeans/MiniBatch.hpp +269 -0
  371. data/vendor/kmeans/QuickSearch.hpp +179 -0
  372. data/vendor/kmeans/compute_centroids.hpp +32 -0
  373. data/vendor/kmeans/compute_wcss.hpp +27 -0
  374. data/vendor/kmeans/is_edge_case.hpp +42 -0
  375. data/vendor/kmeans/random.hpp +55 -0
  376. data/vendor/knncolle/Annoy/Annoy.hpp +193 -0
  377. data/vendor/knncolle/BruteForce/BruteForce.hpp +120 -0
  378. data/vendor/knncolle/Hnsw/Hnsw.hpp +225 -0
  379. data/vendor/knncolle/Kmknn/Kmknn.hpp +286 -0
  380. data/vendor/knncolle/VpTree/VpTree.hpp +256 -0
  381. data/vendor/knncolle/knncolle.hpp +34 -0
  382. data/vendor/knncolle/utils/Base.hpp +100 -0
  383. data/vendor/knncolle/utils/NeighborQueue.hpp +94 -0
  384. data/vendor/knncolle/utils/distances.hpp +98 -0
  385. data/vendor/knncolle/utils/find_nearest_neighbors.hpp +112 -0
  386. data/vendor/powerit/PowerIterations.hpp +157 -0
  387. data/vendor/umappp/NeighborList.hpp +37 -0
  388. data/vendor/umappp/Umap.hpp +662 -0
  389. data/vendor/umappp/combine_neighbor_sets.hpp +95 -0
  390. data/vendor/umappp/find_ab.hpp +157 -0
  391. data/vendor/umappp/neighbor_similarities.hpp +136 -0
  392. data/vendor/umappp/optimize_layout.hpp +285 -0
  393. data/vendor/umappp/spectral_init.hpp +181 -0
  394. data/vendor/umappp/umappp.hpp +13 -0
  395. metadata +465 -0
@@ -0,0 +1,1060 @@
1
+ // This file is part of Eigen, a lightweight C++ template library
2
+ // for linear algebra.
3
+ //
4
+ // Copyright (C) 2016 Konstantinos Margaritis <markos@freevec.org>
5
+ //
6
+ // This Source Code Form is subject to the terms of the Mozilla
7
+ // Public License v. 2.0. If a copy of the MPL was not distributed
8
+ // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
9
+
10
+ #ifndef EIGEN_PACKET_MATH_ZVECTOR_H
11
+ #define EIGEN_PACKET_MATH_ZVECTOR_H
12
+
13
+ namespace Eigen {
14
+
15
+ namespace internal {
16
+
17
+ #ifndef EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD
18
+ #define EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD 16
19
+ #endif
20
+
21
+ #ifndef EIGEN_HAS_SINGLE_INSTRUCTION_MADD
22
+ #define EIGEN_HAS_SINGLE_INSTRUCTION_MADD
23
+ #endif
24
+
25
+ #ifndef EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS
26
+ #define EIGEN_ARCH_DEFAULT_NUMBER_OF_REGISTERS 32
27
+ #endif
28
+
29
+ typedef __vector int Packet4i;
30
+ typedef __vector unsigned int Packet4ui;
31
+ typedef __vector __bool int Packet4bi;
32
+ typedef __vector short int Packet8i;
33
+ typedef __vector unsigned char Packet16uc;
34
+ typedef __vector double Packet2d;
35
+ typedef __vector unsigned long long Packet2ul;
36
+ typedef __vector long long Packet2l;
37
+
38
+ // Z14 has builtin support for float vectors
39
+ #if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ >= 12)
40
+ typedef __vector float Packet4f;
41
+ #else
42
+ typedef struct {
43
+ Packet2d v4f[2];
44
+ } Packet4f;
45
+ #endif
46
+
47
+ typedef union {
48
+ numext::int32_t i[4];
49
+ numext::uint32_t ui[4];
50
+ numext::int64_t l[2];
51
+ numext::uint64_t ul[2];
52
+ double d[2];
53
+ float f[4];
54
+ Packet4i v4i;
55
+ Packet4ui v4ui;
56
+ Packet2l v2l;
57
+ Packet2ul v2ul;
58
+ Packet2d v2d;
59
+ #if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ >= 12)
60
+ Packet4f v4f;
61
+ #endif
62
+ } Packet;
63
+
64
+ // We don't want to write the same code all the time, but we need to reuse the constants
65
+ // and it doesn't really work to declare them global, so we define macros instead
66
+
67
+ #define _EIGEN_DECLARE_CONST_FAST_Packet4i(NAME,X) \
68
+ Packet4i p4i_##NAME = reinterpret_cast<Packet4i>(vec_splat_s32(X))
69
+
70
+ #define _EIGEN_DECLARE_CONST_FAST_Packet2d(NAME,X) \
71
+ Packet2d p2d_##NAME = reinterpret_cast<Packet2d>(vec_splat_s64(X))
72
+
73
+ #define _EIGEN_DECLARE_CONST_FAST_Packet2l(NAME,X) \
74
+ Packet2l p2l_##NAME = reinterpret_cast<Packet2l>(vec_splat_s64(X))
75
+
76
+ #define _EIGEN_DECLARE_CONST_Packet4i(NAME,X) \
77
+ Packet4i p4i_##NAME = pset1<Packet4i>(X)
78
+
79
+ #define _EIGEN_DECLARE_CONST_Packet2d(NAME,X) \
80
+ Packet2d p2d_##NAME = pset1<Packet2d>(X)
81
+
82
+ #define _EIGEN_DECLARE_CONST_Packet2l(NAME,X) \
83
+ Packet2l p2l_##NAME = pset1<Packet2l>(X)
84
+
85
+ // These constants are endian-agnostic
86
+ static _EIGEN_DECLARE_CONST_FAST_Packet4i(ZERO, 0); //{ 0, 0, 0, 0,}
87
+ static _EIGEN_DECLARE_CONST_FAST_Packet4i(ONE, 1); //{ 1, 1, 1, 1}
88
+
89
+ static _EIGEN_DECLARE_CONST_FAST_Packet2d(ZERO, 0);
90
+ static _EIGEN_DECLARE_CONST_FAST_Packet2l(ZERO, 0);
91
+ static _EIGEN_DECLARE_CONST_FAST_Packet2l(ONE, 1);
92
+
93
+ static Packet2d p2d_ONE = { 1.0, 1.0 };
94
+ static Packet2d p2d_ZERO_ = { numext::bit_cast<double>0x8000000000000000ull),
95
+ numext::bit_cast<double>0x8000000000000000ull) };
96
+
97
+ #if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ >= 12)
98
+ #define _EIGEN_DECLARE_CONST_FAST_Packet4f(NAME,X) \
99
+ Packet4f p4f_##NAME = reinterpret_cast<Packet4f>(vec_splat_s32(X))
100
+
101
+ #define _EIGEN_DECLARE_CONST_Packet4f(NAME,X) \
102
+ Packet4f p4f_##NAME = pset1<Packet4f>(X)
103
+
104
+ #define _EIGEN_DECLARE_CONST_Packet4f_FROM_INT(NAME,X) \
105
+ const Packet4f p4f_##NAME = reinterpret_cast<Packet4f>(pset1<Packet4i>(X))
106
+
107
+ static _EIGEN_DECLARE_CONST_FAST_Packet4f(ZERO, 0); //{ 0.0, 0.0, 0.0, 0.0}
108
+ static _EIGEN_DECLARE_CONST_FAST_Packet4i(MINUS1,-1); //{ -1, -1, -1, -1}
109
+ static Packet4f p4f_MZERO = { 0x80000000, 0x80000000, 0x80000000, 0x80000000};
110
+ #endif
111
+
112
+ static Packet4i p4i_COUNTDOWN = { 0, 1, 2, 3 };
113
+ static Packet4f p4f_COUNTDOWN = { 0.0, 1.0, 2.0, 3.0 };
114
+ static Packet2d p2d_COUNTDOWN = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet16uc>(p2d_ZERO), reinterpret_cast<Packet16uc>(p2d_ONE), 8));
115
+
116
+ static Packet16uc p16uc_PSET64_HI = { 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 };
117
+ static Packet16uc p16uc_DUPLICATE32_HI = { 0,1,2,3, 0,1,2,3, 4,5,6,7, 4,5,6,7 };
118
+
119
+ // Mask alignment
120
+ #define _EIGEN_MASK_ALIGNMENT 0xfffffffffffffff0
121
+
122
+ #define _EIGEN_ALIGNED_PTR(x) ((std::ptrdiff_t)(x) & _EIGEN_MASK_ALIGNMENT)
123
+
124
+ // Handle endianness properly while loading constants
125
+ // Define global static constants:
126
+
127
+ static Packet16uc p16uc_FORWARD = { 0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15 };
128
+ static Packet16uc p16uc_REVERSE32 = { 12,13,14,15, 8,9,10,11, 4,5,6,7, 0,1,2,3 };
129
+ static Packet16uc p16uc_REVERSE64 = { 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
130
+
131
+ static Packet16uc p16uc_PSET32_WODD = vec_sld((Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 0), (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 2), 8);//{ 0,1,2,3, 0,1,2,3, 8,9,10,11, 8,9,10,11 };
132
+ static Packet16uc p16uc_PSET32_WEVEN = vec_sld(p16uc_DUPLICATE32_HI, (Packet16uc) vec_splat((Packet4ui)p16uc_FORWARD, 3), 8);//{ 4,5,6,7, 4,5,6,7, 12,13,14,15, 12,13,14,15 };
133
+ /*static Packet16uc p16uc_HALF64_0_16 = vec_sld((Packet16uc)p4i_ZERO, vec_splat((Packet16uc) vec_abs(p4i_MINUS16), 3), 8); //{ 0,0,0,0, 0,0,0,0, 16,16,16,16, 16,16,16,16};
134
+
135
+ static Packet16uc p16uc_PSET64_HI = (Packet16uc) vec_mergeh((Packet4ui)p16uc_PSET32_WODD, (Packet4ui)p16uc_PSET32_WEVEN); //{ 0,1,2,3, 4,5,6,7, 0,1,2,3, 4,5,6,7 };*/
136
+ static Packet16uc p16uc_PSET64_LO = (Packet16uc) vec_mergel((Packet4ui)p16uc_PSET32_WODD, (Packet4ui)p16uc_PSET32_WEVEN); //{ 8,9,10,11, 12,13,14,15, 8,9,10,11, 12,13,14,15 };
137
+ /*static Packet16uc p16uc_TRANSPOSE64_HI = vec_add(p16uc_PSET64_HI, p16uc_HALF64_0_16); //{ 0,1,2,3, 4,5,6,7, 16,17,18,19, 20,21,22,23};
138
+ static Packet16uc p16uc_TRANSPOSE64_LO = vec_add(p16uc_PSET64_LO, p16uc_HALF64_0_16); //{ 8,9,10,11, 12,13,14,15, 24,25,26,27, 28,29,30,31};*/
139
+ static Packet16uc p16uc_TRANSPOSE64_HI = { 0,1,2,3, 4,5,6,7, 16,17,18,19, 20,21,22,23};
140
+ static Packet16uc p16uc_TRANSPOSE64_LO = { 8,9,10,11, 12,13,14,15, 24,25,26,27, 28,29,30,31};
141
+
142
+ static Packet16uc p16uc_COMPLEX32_REV = vec_sld(p16uc_REVERSE32, p16uc_REVERSE32, 8); //{ 4,5,6,7, 0,1,2,3, 12,13,14,15, 8,9,10,11 };
143
+
144
+ static Packet16uc p16uc_COMPLEX32_REV2 = vec_sld(p16uc_FORWARD, p16uc_FORWARD, 8); //{ 8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7 };
145
+
146
+
147
+ #if EIGEN_HAS_BUILTIN(__builtin_prefetch) || EIGEN_COMP_GNUC
148
+ #define EIGEN_ZVECTOR_PREFETCH(ADDR) __builtin_prefetch(ADDR);
149
+ #else
150
+ #define EIGEN_ZVECTOR_PREFETCH(ADDR) asm( " pfd [%[addr]]\n" :: [addr] "r" (ADDR) : "cc" );
151
+ #endif
152
+
153
+ template<> struct packet_traits<int> : default_packet_traits
154
+ {
155
+ typedef Packet4i type;
156
+ typedef Packet4i half;
157
+ enum {
158
+ Vectorizable = 1,
159
+ AlignedOnScalar = 1,
160
+ size = 4,
161
+ HasHalfPacket = 0,
162
+
163
+ HasAdd = 1,
164
+ HasSub = 1,
165
+ HasMul = 1,
166
+ HasDiv = 1,
167
+ HasBlend = 1
168
+ };
169
+ };
170
+
171
+ template <>
172
+ struct packet_traits<float> : default_packet_traits {
173
+ typedef Packet4f type;
174
+ typedef Packet4f half;
175
+ enum {
176
+ Vectorizable = 1,
177
+ AlignedOnScalar = 1,
178
+ size = 4,
179
+ HasHalfPacket = 0,
180
+
181
+ HasAdd = 1,
182
+ HasSub = 1,
183
+ HasMul = 1,
184
+ HasDiv = 1,
185
+ HasMin = 1,
186
+ HasMax = 1,
187
+ HasAbs = 1,
188
+ HasSin = 0,
189
+ HasCos = 0,
190
+ HasLog = 0,
191
+ HasExp = 1,
192
+ HasSqrt = 1,
193
+ HasRsqrt = 1,
194
+ HasTanh = 1,
195
+ HasErf = 1,
196
+ HasRound = 1,
197
+ HasFloor = 1,
198
+ HasCeil = 1,
199
+ HasNegate = 1,
200
+ HasBlend = 1
201
+ };
202
+ };
203
+
204
+ template<> struct packet_traits<double> : default_packet_traits
205
+ {
206
+ typedef Packet2d type;
207
+ typedef Packet2d half;
208
+ enum {
209
+ Vectorizable = 1,
210
+ AlignedOnScalar = 1,
211
+ size=2,
212
+ HasHalfPacket = 1,
213
+
214
+ HasAdd = 1,
215
+ HasSub = 1,
216
+ HasMul = 1,
217
+ HasDiv = 1,
218
+ HasMin = 1,
219
+ HasMax = 1,
220
+ HasAbs = 1,
221
+ HasSin = 0,
222
+ HasCos = 0,
223
+ HasLog = 0,
224
+ HasExp = 1,
225
+ HasSqrt = 1,
226
+ HasRsqrt = 1,
227
+ HasRound = 1,
228
+ HasFloor = 1,
229
+ HasCeil = 1,
230
+ HasNegate = 1,
231
+ HasBlend = 1
232
+ };
233
+ };
234
+
235
+ template<> struct unpacket_traits<Packet4i> { typedef int type; enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet4i half; };
236
+ template<> struct unpacket_traits<Packet4f> { typedef float type; enum {size=4, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet4f half; };
237
+ template<> struct unpacket_traits<Packet2d> { typedef double type; enum {size=2, alignment=Aligned16, vectorizable=true, masked_load_available=false, masked_store_available=false}; typedef Packet2d half; };
238
+
239
+ /* Forward declaration */
240
+ EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock<Packet4f,4>& kernel);
241
+
242
+ inline std::ostream & operator <<(std::ostream & s, const Packet4i & v)
243
+ {
244
+ Packet vt;
245
+ vt.v4i = v;
246
+ s << vt.i[0] << ", " << vt.i[1] << ", " << vt.i[2] << ", " << vt.i[3];
247
+ return s;
248
+ }
249
+
250
+ inline std::ostream & operator <<(std::ostream & s, const Packet4ui & v)
251
+ {
252
+ Packet vt;
253
+ vt.v4ui = v;
254
+ s << vt.ui[0] << ", " << vt.ui[1] << ", " << vt.ui[2] << ", " << vt.ui[3];
255
+ return s;
256
+ }
257
+
258
+ inline std::ostream & operator <<(std::ostream & s, const Packet2l & v)
259
+ {
260
+ Packet vt;
261
+ vt.v2l = v;
262
+ s << vt.l[0] << ", " << vt.l[1];
263
+ return s;
264
+ }
265
+
266
+ inline std::ostream & operator <<(std::ostream & s, const Packet2ul & v)
267
+ {
268
+ Packet vt;
269
+ vt.v2ul = v;
270
+ s << vt.ul[0] << ", " << vt.ul[1] ;
271
+ return s;
272
+ }
273
+
274
+ inline std::ostream & operator <<(std::ostream & s, const Packet2d & v)
275
+ {
276
+ Packet vt;
277
+ vt.v2d = v;
278
+ s << vt.d[0] << ", " << vt.d[1];
279
+ return s;
280
+ }
281
+
282
+ #if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ >= 12)
283
+ inline std::ostream & operator <<(std::ostream & s, const Packet4f & v)
284
+ {
285
+ Packet vt;
286
+ vt.v4f = v;
287
+ s << vt.f[0] << ", " << vt.f[1] << ", " << vt.f[2] << ", " << vt.f[3];
288
+ return s;
289
+ }
290
+ #endif
291
+
292
+ template<> EIGEN_STRONG_INLINE Packet4i pload<Packet4i>(const int* from)
293
+ {
294
+ // FIXME: No intrinsic yet
295
+ EIGEN_DEBUG_ALIGNED_LOAD
296
+ Packet *vfrom;
297
+ vfrom = (Packet *) from;
298
+ return vfrom->v4i;
299
+ }
300
+
301
+ template<> EIGEN_STRONG_INLINE Packet2d pload<Packet2d>(const double* from)
302
+ {
303
+ // FIXME: No intrinsic yet
304
+ EIGEN_DEBUG_ALIGNED_LOAD
305
+ Packet *vfrom;
306
+ vfrom = (Packet *) from;
307
+ return vfrom->v2d;
308
+ }
309
+
310
+ template<> EIGEN_STRONG_INLINE void pstore<int>(int* to, const Packet4i& from)
311
+ {
312
+ // FIXME: No intrinsic yet
313
+ EIGEN_DEBUG_ALIGNED_STORE
314
+ Packet *vto;
315
+ vto = (Packet *) to;
316
+ vto->v4i = from;
317
+ }
318
+
319
+ template<> EIGEN_STRONG_INLINE void pstore<double>(double* to, const Packet2d& from)
320
+ {
321
+ // FIXME: No intrinsic yet
322
+ EIGEN_DEBUG_ALIGNED_STORE
323
+ Packet *vto;
324
+ vto = (Packet *) to;
325
+ vto->v2d = from;
326
+ }
327
+
328
+ template<> EIGEN_STRONG_INLINE Packet4i pset1<Packet4i>(const int& from)
329
+ {
330
+ return vec_splats(from);
331
+ }
332
+ template<> EIGEN_STRONG_INLINE Packet2d pset1<Packet2d>(const double& from) {
333
+ return vec_splats(from);
334
+ }
335
+
336
+ template<> EIGEN_STRONG_INLINE void
337
+ pbroadcast4<Packet4i>(const int *a,
338
+ Packet4i& a0, Packet4i& a1, Packet4i& a2, Packet4i& a3)
339
+ {
340
+ a3 = pload<Packet4i>(a);
341
+ a0 = vec_splat(a3, 0);
342
+ a1 = vec_splat(a3, 1);
343
+ a2 = vec_splat(a3, 2);
344
+ a3 = vec_splat(a3, 3);
345
+ }
346
+
347
+ template<> EIGEN_STRONG_INLINE void
348
+ pbroadcast4<Packet2d>(const double *a,
349
+ Packet2d& a0, Packet2d& a1, Packet2d& a2, Packet2d& a3)
350
+ {
351
+ a1 = pload<Packet2d>(a);
352
+ a0 = vec_splat(a1, 0);
353
+ a1 = vec_splat(a1, 1);
354
+ a3 = pload<Packet2d>(a+2);
355
+ a2 = vec_splat(a3, 0);
356
+ a3 = vec_splat(a3, 1);
357
+ }
358
+
359
+ template<> EIGEN_DEVICE_FUNC inline Packet4i pgather<int, Packet4i>(const int* from, Index stride)
360
+ {
361
+ int EIGEN_ALIGN16 ai[4];
362
+ ai[0] = from[0*stride];
363
+ ai[1] = from[1*stride];
364
+ ai[2] = from[2*stride];
365
+ ai[3] = from[3*stride];
366
+ return pload<Packet4i>(ai);
367
+ }
368
+
369
+ template<> EIGEN_DEVICE_FUNC inline Packet2d pgather<double, Packet2d>(const double* from, Index stride)
370
+ {
371
+ double EIGEN_ALIGN16 af[2];
372
+ af[0] = from[0*stride];
373
+ af[1] = from[1*stride];
374
+ return pload<Packet2d>(af);
375
+ }
376
+
377
+ template<> EIGEN_DEVICE_FUNC inline void pscatter<int, Packet4i>(int* to, const Packet4i& from, Index stride)
378
+ {
379
+ int EIGEN_ALIGN16 ai[4];
380
+ pstore<int>((int *)ai, from);
381
+ to[0*stride] = ai[0];
382
+ to[1*stride] = ai[1];
383
+ to[2*stride] = ai[2];
384
+ to[3*stride] = ai[3];
385
+ }
386
+
387
+ template<> EIGEN_DEVICE_FUNC inline void pscatter<double, Packet2d>(double* to, const Packet2d& from, Index stride)
388
+ {
389
+ double EIGEN_ALIGN16 af[2];
390
+ pstore<double>(af, from);
391
+ to[0*stride] = af[0];
392
+ to[1*stride] = af[1];
393
+ }
394
+
395
+ template<> EIGEN_STRONG_INLINE Packet4i padd<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a + b); }
396
+ template<> EIGEN_STRONG_INLINE Packet2d padd<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a + b); }
397
+
398
+ template<> EIGEN_STRONG_INLINE Packet4i psub<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a - b); }
399
+ template<> EIGEN_STRONG_INLINE Packet2d psub<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a - b); }
400
+
401
+ template<> EIGEN_STRONG_INLINE Packet4i pmul<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a * b); }
402
+ template<> EIGEN_STRONG_INLINE Packet2d pmul<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a * b); }
403
+
404
+ template<> EIGEN_STRONG_INLINE Packet4i pdiv<Packet4i>(const Packet4i& a, const Packet4i& b) { return (a / b); }
405
+ template<> EIGEN_STRONG_INLINE Packet2d pdiv<Packet2d>(const Packet2d& a, const Packet2d& b) { return (a / b); }
406
+
407
+ template<> EIGEN_STRONG_INLINE Packet4i pnegate(const Packet4i& a) { return (-a); }
408
+ template<> EIGEN_STRONG_INLINE Packet2d pnegate(const Packet2d& a) { return (-a); }
409
+
410
+ template<> EIGEN_STRONG_INLINE Packet4i pconj(const Packet4i& a) { return a; }
411
+ template<> EIGEN_STRONG_INLINE Packet2d pconj(const Packet2d& a) { return a; }
412
+
413
+ template<> EIGEN_STRONG_INLINE Packet4i pmadd(const Packet4i& a, const Packet4i& b, const Packet4i& c) { return padd<Packet4i>(pmul<Packet4i>(a, b), c); }
414
+ template<> EIGEN_STRONG_INLINE Packet2d pmadd(const Packet2d& a, const Packet2d& b, const Packet2d& c) { return vec_madd(a, b, c); }
415
+
416
+ template<> EIGEN_STRONG_INLINE Packet4i plset<Packet4i>(const int& a) { return padd<Packet4i>(pset1<Packet4i>(a), p4i_COUNTDOWN); }
417
+ template<> EIGEN_STRONG_INLINE Packet2d plset<Packet2d>(const double& a) { return padd<Packet2d>(pset1<Packet2d>(a), p2d_COUNTDOWN); }
418
+
419
+ template<> EIGEN_STRONG_INLINE Packet4i pmin<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_min(a, b); }
420
+ template<> EIGEN_STRONG_INLINE Packet2d pmin<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_min(a, b); }
421
+
422
+ template<> EIGEN_STRONG_INLINE Packet4i pmax<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_max(a, b); }
423
+ template<> EIGEN_STRONG_INLINE Packet2d pmax<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_max(a, b); }
424
+
425
+ template<> EIGEN_STRONG_INLINE Packet4i pand<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_and(a, b); }
426
+ template<> EIGEN_STRONG_INLINE Packet2d pand<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, b); }
427
+
428
+ template<> EIGEN_STRONG_INLINE Packet4i por<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_or(a, b); }
429
+ template<> EIGEN_STRONG_INLINE Packet2d por<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_or(a, b); }
430
+
431
+ template<> EIGEN_STRONG_INLINE Packet4i pxor<Packet4i>(const Packet4i& a, const Packet4i& b) { return vec_xor(a, b); }
432
+ template<> EIGEN_STRONG_INLINE Packet2d pxor<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_xor(a, b); }
433
+
434
+ template<> EIGEN_STRONG_INLINE Packet4i pandnot<Packet4i>(const Packet4i& a, const Packet4i& b) { return pand<Packet4i>(a, vec_nor(b, b)); }
435
+ template<> EIGEN_STRONG_INLINE Packet2d pandnot<Packet2d>(const Packet2d& a, const Packet2d& b) { return vec_and(a, vec_nor(b, b)); }
436
+
437
+ template<> EIGEN_STRONG_INLINE Packet2d pround<Packet2d>(const Packet2d& a) { return vec_round(a); }
438
+ template<> EIGEN_STRONG_INLINE Packet2d pceil<Packet2d>(const Packet2d& a) { return vec_ceil(a); }
439
+ template<> EIGEN_STRONG_INLINE Packet2d pfloor<Packet2d>(const Packet2d& a) { return vec_floor(a); }
440
+
441
+ template<> EIGEN_STRONG_INLINE Packet4i ploadu<Packet4i>(const int* from) { return pload<Packet4i>(from); }
442
+ template<> EIGEN_STRONG_INLINE Packet2d ploadu<Packet2d>(const double* from) { return pload<Packet2d>(from); }
443
+
444
+
445
+ template<> EIGEN_STRONG_INLINE Packet4i ploaddup<Packet4i>(const int* from)
446
+ {
447
+ Packet4i p = pload<Packet4i>(from);
448
+ return vec_perm(p, p, p16uc_DUPLICATE32_HI);
449
+ }
450
+
451
+ template<> EIGEN_STRONG_INLINE Packet2d ploaddup<Packet2d>(const double* from)
452
+ {
453
+ Packet2d p = pload<Packet2d>(from);
454
+ return vec_perm(p, p, p16uc_PSET64_HI);
455
+ }
456
+
457
+ template<> EIGEN_STRONG_INLINE void pstoreu<int>(int* to, const Packet4i& from) { pstore<int>(to, from); }
458
+ template<> EIGEN_STRONG_INLINE void pstoreu<double>(double* to, const Packet2d& from) { pstore<double>(to, from); }
459
+
460
+ template<> EIGEN_STRONG_INLINE void prefetch<int>(const int* addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
461
+ template<> EIGEN_STRONG_INLINE void prefetch<double>(const double* addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
462
+
463
+ template<> EIGEN_STRONG_INLINE int pfirst<Packet4i>(const Packet4i& a) { int EIGEN_ALIGN16 x[4]; pstore(x, a); return x[0]; }
464
+ template<> EIGEN_STRONG_INLINE double pfirst<Packet2d>(const Packet2d& a) { double EIGEN_ALIGN16 x[2]; pstore(x, a); return x[0]; }
465
+
466
+ template<> EIGEN_STRONG_INLINE Packet4i preverse(const Packet4i& a)
467
+ {
468
+ return reinterpret_cast<Packet4i>(vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE32));
469
+ }
470
+
471
+ template<> EIGEN_STRONG_INLINE Packet2d preverse(const Packet2d& a)
472
+ {
473
+ return reinterpret_cast<Packet2d>(vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE64));
474
+ }
475
+
476
+ template<> EIGEN_STRONG_INLINE Packet4i pabs<Packet4i>(const Packet4i& a) { return vec_abs(a); }
477
+ template<> EIGEN_STRONG_INLINE Packet2d pabs<Packet2d>(const Packet2d& a) { return vec_abs(a); }
478
+
479
+ template<> EIGEN_STRONG_INLINE int predux<Packet4i>(const Packet4i& a)
480
+ {
481
+ Packet4i b, sum;
482
+ b = vec_sld(a, a, 8);
483
+ sum = padd<Packet4i>(a, b);
484
+ b = vec_sld(sum, sum, 4);
485
+ sum = padd<Packet4i>(sum, b);
486
+ return pfirst(sum);
487
+ }
488
+
489
+ template<> EIGEN_STRONG_INLINE double predux<Packet2d>(const Packet2d& a)
490
+ {
491
+ Packet2d b, sum;
492
+ b = reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8));
493
+ sum = padd<Packet2d>(a, b);
494
+ return pfirst(sum);
495
+ }
496
+
497
+ // Other reduction functions:
498
+ // mul
499
+ template<> EIGEN_STRONG_INLINE int predux_mul<Packet4i>(const Packet4i& a)
500
+ {
501
+ EIGEN_ALIGN16 int aux[4];
502
+ pstore(aux, a);
503
+ return aux[0] * aux[1] * aux[2] * aux[3];
504
+ }
505
+
506
+ template<> EIGEN_STRONG_INLINE double predux_mul<Packet2d>(const Packet2d& a)
507
+ {
508
+ return pfirst(pmul(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8))));
509
+ }
510
+
511
+ // min
512
+ template<> EIGEN_STRONG_INLINE int predux_min<Packet4i>(const Packet4i& a)
513
+ {
514
+ Packet4i b, res;
515
+ b = pmin<Packet4i>(a, vec_sld(a, a, 8));
516
+ res = pmin<Packet4i>(b, vec_sld(b, b, 4));
517
+ return pfirst(res);
518
+ }
519
+
520
+ template<> EIGEN_STRONG_INLINE double predux_min<Packet2d>(const Packet2d& a)
521
+ {
522
+ return pfirst(pmin<Packet2d>(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8))));
523
+ }
524
+
525
+ // max
526
+ template<> EIGEN_STRONG_INLINE int predux_max<Packet4i>(const Packet4i& a)
527
+ {
528
+ Packet4i b, res;
529
+ b = pmax<Packet4i>(a, vec_sld(a, a, 8));
530
+ res = pmax<Packet4i>(b, vec_sld(b, b, 4));
531
+ return pfirst(res);
532
+ }
533
+
534
+ // max
535
+ template<> EIGEN_STRONG_INLINE double predux_max<Packet2d>(const Packet2d& a)
536
+ {
537
+ return pfirst(pmax<Packet2d>(a, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(a), reinterpret_cast<Packet4i>(a), 8))));
538
+ }
539
+
540
+ EIGEN_DEVICE_FUNC inline void
541
+ ptranspose(PacketBlock<Packet4i,4>& kernel) {
542
+ Packet4i t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]);
543
+ Packet4i t1 = vec_mergel(kernel.packet[0], kernel.packet[2]);
544
+ Packet4i t2 = vec_mergeh(kernel.packet[1], kernel.packet[3]);
545
+ Packet4i t3 = vec_mergel(kernel.packet[1], kernel.packet[3]);
546
+ kernel.packet[0] = vec_mergeh(t0, t2);
547
+ kernel.packet[1] = vec_mergel(t0, t2);
548
+ kernel.packet[2] = vec_mergeh(t1, t3);
549
+ kernel.packet[3] = vec_mergel(t1, t3);
550
+ }
551
+
552
+ EIGEN_DEVICE_FUNC inline void
553
+ ptranspose(PacketBlock<Packet2d,2>& kernel) {
554
+ Packet2d t0 = vec_perm(kernel.packet[0], kernel.packet[1], p16uc_TRANSPOSE64_HI);
555
+ Packet2d t1 = vec_perm(kernel.packet[0], kernel.packet[1], p16uc_TRANSPOSE64_LO);
556
+ kernel.packet[0] = t0;
557
+ kernel.packet[1] = t1;
558
+ }
559
+
560
+ template<> EIGEN_STRONG_INLINE Packet4i pblend(const Selector<4>& ifPacket, const Packet4i& thenPacket, const Packet4i& elsePacket) {
561
+ Packet4ui select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3] };
562
+ Packet4ui mask = vec_cmpeq(select, reinterpret_cast<Packet4ui>(p4i_ONE));
563
+ return vec_sel(elsePacket, thenPacket, mask);
564
+ }
565
+
566
+
567
+ template<> EIGEN_STRONG_INLINE Packet2d pblend(const Selector<2>& ifPacket, const Packet2d& thenPacket, const Packet2d& elsePacket) {
568
+ Packet2ul select = { ifPacket.select[0], ifPacket.select[1] };
569
+ Packet2ul mask = vec_cmpeq(select, reinterpret_cast<Packet2ul>(p2l_ONE));
570
+ return vec_sel(elsePacket, thenPacket, mask);
571
+ }
572
+
573
+ /* z13 has no vector float support so we emulate that with double
574
+ z14 has proper vector float support.
575
+ */
576
+ #if !defined(__ARCH__) || (defined(__ARCH__) && __ARCH__ < 12)
577
+ /* Helper function to simulate a vec_splat_packet4f
578
+ */
579
+ template<int element> EIGEN_STRONG_INLINE Packet4f vec_splat_packet4f(const Packet4f& from)
580
+ {
581
+ Packet4f splat;
582
+ switch (element) {
583
+ case 0:
584
+ splat.v4f[0] = vec_splat(from.v4f[0], 0);
585
+ splat.v4f[1] = splat.v4f[0];
586
+ break;
587
+ case 1:
588
+ splat.v4f[0] = vec_splat(from.v4f[0], 1);
589
+ splat.v4f[1] = splat.v4f[0];
590
+ break;
591
+ case 2:
592
+ splat.v4f[0] = vec_splat(from.v4f[1], 0);
593
+ splat.v4f[1] = splat.v4f[0];
594
+ break;
595
+ case 3:
596
+ splat.v4f[0] = vec_splat(from.v4f[1], 1);
597
+ splat.v4f[1] = splat.v4f[0];
598
+ break;
599
+ }
600
+ return splat;
601
+ }
602
+
603
+ template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from)
604
+ {
605
+ // FIXME: No intrinsic yet
606
+ EIGEN_DEBUG_ALIGNED_LOAD
607
+ Packet4f vfrom;
608
+ vfrom.v4f[0] = vec_ld2f(&from[0]);
609
+ vfrom.v4f[1] = vec_ld2f(&from[2]);
610
+ return vfrom;
611
+ }
612
+
613
+ template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from)
614
+ {
615
+ // FIXME: No intrinsic yet
616
+ EIGEN_DEBUG_ALIGNED_STORE
617
+ vec_st2f(from.v4f[0], &to[0]);
618
+ vec_st2f(from.v4f[1], &to[2]);
619
+ }
620
+
621
+ template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from)
622
+ {
623
+ Packet4f to;
624
+ to.v4f[0] = pset1<Packet2d>(static_cast<const double&>(from));
625
+ to.v4f[1] = to.v4f[0];
626
+ return to;
627
+ }
628
+
629
+ template<> EIGEN_STRONG_INLINE void
630
+ pbroadcast4<Packet4f>(const float *a,
631
+ Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3)
632
+ {
633
+ a3 = pload<Packet4f>(a);
634
+ a0 = vec_splat_packet4f<0>(a3);
635
+ a1 = vec_splat_packet4f<1>(a3);
636
+ a2 = vec_splat_packet4f<2>(a3);
637
+ a3 = vec_splat_packet4f<3>(a3);
638
+ }
639
+
640
+ template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride)
641
+ {
642
+ float EIGEN_ALIGN16 ai[4];
643
+ ai[0] = from[0*stride];
644
+ ai[1] = from[1*stride];
645
+ ai[2] = from[2*stride];
646
+ ai[3] = from[3*stride];
647
+ return pload<Packet4f>(ai);
648
+ }
649
+
650
+ template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride)
651
+ {
652
+ float EIGEN_ALIGN16 ai[4];
653
+ pstore<float>((float *)ai, from);
654
+ to[0*stride] = ai[0];
655
+ to[1*stride] = ai[1];
656
+ to[2*stride] = ai[2];
657
+ to[3*stride] = ai[3];
658
+ }
659
+
660
+ template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b)
661
+ {
662
+ Packet4f c;
663
+ c.v4f[0] = a.v4f[0] + b.v4f[0];
664
+ c.v4f[1] = a.v4f[1] + b.v4f[1];
665
+ return c;
666
+ }
667
+
668
+ template<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b)
669
+ {
670
+ Packet4f c;
671
+ c.v4f[0] = a.v4f[0] - b.v4f[0];
672
+ c.v4f[1] = a.v4f[1] - b.v4f[1];
673
+ return c;
674
+ }
675
+
676
+ template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b)
677
+ {
678
+ Packet4f c;
679
+ c.v4f[0] = a.v4f[0] * b.v4f[0];
680
+ c.v4f[1] = a.v4f[1] * b.v4f[1];
681
+ return c;
682
+ }
683
+
684
+ template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b)
685
+ {
686
+ Packet4f c;
687
+ c.v4f[0] = a.v4f[0] / b.v4f[0];
688
+ c.v4f[1] = a.v4f[1] / b.v4f[1];
689
+ return c;
690
+ }
691
+
692
+ template<> EIGEN_STRONG_INLINE Packet4f pnegate(const Packet4f& a)
693
+ {
694
+ Packet4f c;
695
+ c.v4f[0] = -a.v4f[0];
696
+ c.v4f[1] = -a.v4f[1];
697
+ return c;
698
+ }
699
+
700
+ template<> EIGEN_STRONG_INLINE Packet4f pmadd(const Packet4f& a, const Packet4f& b, const Packet4f& c)
701
+ {
702
+ Packet4f res;
703
+ res.v4f[0] = vec_madd(a.v4f[0], b.v4f[0], c.v4f[0]);
704
+ res.v4f[1] = vec_madd(a.v4f[1], b.v4f[1], c.v4f[1]);
705
+ return res;
706
+ }
707
+
708
+ template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f>(const Packet4f& a, const Packet4f& b)
709
+ {
710
+ Packet4f res;
711
+ res.v4f[0] = pmin(a.v4f[0], b.v4f[0]);
712
+ res.v4f[1] = pmin(a.v4f[1], b.v4f[1]);
713
+ return res;
714
+ }
715
+
716
+ template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f>(const Packet4f& a, const Packet4f& b)
717
+ {
718
+ Packet4f res;
719
+ res.v4f[0] = pmax(a.v4f[0], b.v4f[0]);
720
+ res.v4f[1] = pmax(a.v4f[1], b.v4f[1]);
721
+ return res;
722
+ }
723
+
724
+ template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f>(const Packet4f& a, const Packet4f& b)
725
+ {
726
+ Packet4f res;
727
+ res.v4f[0] = pand(a.v4f[0], b.v4f[0]);
728
+ res.v4f[1] = pand(a.v4f[1], b.v4f[1]);
729
+ return res;
730
+ }
731
+
732
+ template<> EIGEN_STRONG_INLINE Packet4f por<Packet4f>(const Packet4f& a, const Packet4f& b)
733
+ {
734
+ Packet4f res;
735
+ res.v4f[0] = por(a.v4f[0], b.v4f[0]);
736
+ res.v4f[1] = por(a.v4f[1], b.v4f[1]);
737
+ return res;
738
+ }
739
+
740
+ template<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f>(const Packet4f& a, const Packet4f& b)
741
+ {
742
+ Packet4f res;
743
+ res.v4f[0] = pxor(a.v4f[0], b.v4f[0]);
744
+ res.v4f[1] = pxor(a.v4f[1], b.v4f[1]);
745
+ return res;
746
+ }
747
+
748
+ template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b)
749
+ {
750
+ Packet4f res;
751
+ res.v4f[0] = pandnot(a.v4f[0], b.v4f[0]);
752
+ res.v4f[1] = pandnot(a.v4f[1], b.v4f[1]);
753
+ return res;
754
+ }
755
+
756
+ template<> EIGEN_STRONG_INLINE Packet4f pround<Packet4f>(const Packet4f& a)
757
+ {
758
+ Packet4f res;
759
+ res.v4f[0] = vec_round(a.v4f[0]);
760
+ res.v4f[1] = vec_round(a.v4f[1]);
761
+ return res;
762
+ }
763
+
764
+ template<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f>(const Packet4f& a)
765
+ {
766
+ Packet4f res;
767
+ res.v4f[0] = vec_ceil(a.v4f[0]);
768
+ res.v4f[1] = vec_ceil(a.v4f[1]);
769
+ return res;
770
+ }
771
+
772
+ template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f>(const Packet4f& a)
773
+ {
774
+ Packet4f res;
775
+ res.v4f[0] = vec_floor(a.v4f[0]);
776
+ res.v4f[1] = vec_floor(a.v4f[1]);
777
+ return res;
778
+ }
779
+
780
+ template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
781
+ {
782
+ Packet4f p = pload<Packet4f>(from);
783
+ p.v4f[1] = vec_splat(p.v4f[0], 1);
784
+ p.v4f[0] = vec_splat(p.v4f[0], 0);
785
+ return p;
786
+ }
787
+
788
+ template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[2]; vec_st2f(a.v4f[0], &x[0]); return x[0]; }
789
+
790
+ template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a)
791
+ {
792
+ Packet4f rev;
793
+ rev.v4f[0] = preverse<Packet2d>(a.v4f[1]);
794
+ rev.v4f[1] = preverse<Packet2d>(a.v4f[0]);
795
+ return rev;
796
+ }
797
+
798
+ template<> EIGEN_STRONG_INLINE Packet4f pabs<Packet4f>(const Packet4f& a)
799
+ {
800
+ Packet4f res;
801
+ res.v4f[0] = pabs(a.v4f[0]);
802
+ res.v4f[1] = pabs(a.v4f[1]);
803
+ return res;
804
+ }
805
+
806
+ template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
807
+ {
808
+ Packet2d sum;
809
+ sum = padd<Packet2d>(a.v4f[0], a.v4f[1]);
810
+ double first = predux<Packet2d>(sum);
811
+ return static_cast<float>(first);
812
+ }
813
+
814
+ template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
815
+ {
816
+ // Return predux_mul<Packet2d> of the subvectors product
817
+ return static_cast<float>(pfirst(predux_mul(pmul(a.v4f[0], a.v4f[1]))));
818
+ }
819
+
820
+ template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
821
+ {
822
+ Packet2d b, res;
823
+ b = pmin<Packet2d>(a.v4f[0], a.v4f[1]);
824
+ res = pmin<Packet2d>(b, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(b), reinterpret_cast<Packet4i>(b), 8)));
825
+ return static_cast<float>(pfirst(res));
826
+ }
827
+
828
+ template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
829
+ {
830
+ Packet2d b, res;
831
+ b = pmax<Packet2d>(a.v4f[0], a.v4f[1]);
832
+ res = pmax<Packet2d>(b, reinterpret_cast<Packet2d>(vec_sld(reinterpret_cast<Packet4i>(b), reinterpret_cast<Packet4i>(b), 8)));
833
+ return static_cast<float>(pfirst(res));
834
+ }
835
+
836
+ /* Split the Packet4f PacketBlock into 4 Packet2d PacketBlocks and transpose each one
837
+ */
838
+ EIGEN_DEVICE_FUNC inline void
839
+ ptranspose(PacketBlock<Packet4f,4>& kernel) {
840
+ PacketBlock<Packet2d,2> t0,t1,t2,t3;
841
+ // copy top-left 2x2 Packet2d block
842
+ t0.packet[0] = kernel.packet[0].v4f[0];
843
+ t0.packet[1] = kernel.packet[1].v4f[0];
844
+
845
+ // copy top-right 2x2 Packet2d block
846
+ t1.packet[0] = kernel.packet[0].v4f[1];
847
+ t1.packet[1] = kernel.packet[1].v4f[1];
848
+
849
+ // copy bottom-left 2x2 Packet2d block
850
+ t2.packet[0] = kernel.packet[2].v4f[0];
851
+ t2.packet[1] = kernel.packet[3].v4f[0];
852
+
853
+ // copy bottom-right 2x2 Packet2d block
854
+ t3.packet[0] = kernel.packet[2].v4f[1];
855
+ t3.packet[1] = kernel.packet[3].v4f[1];
856
+
857
+ // Transpose all 2x2 blocks
858
+ ptranspose(t0);
859
+ ptranspose(t1);
860
+ ptranspose(t2);
861
+ ptranspose(t3);
862
+
863
+ // Copy back transposed blocks, but exchange t1 and t2 due to transposition
864
+ kernel.packet[0].v4f[0] = t0.packet[0];
865
+ kernel.packet[0].v4f[1] = t2.packet[0];
866
+ kernel.packet[1].v4f[0] = t0.packet[1];
867
+ kernel.packet[1].v4f[1] = t2.packet[1];
868
+ kernel.packet[2].v4f[0] = t1.packet[0];
869
+ kernel.packet[2].v4f[1] = t3.packet[0];
870
+ kernel.packet[3].v4f[0] = t1.packet[1];
871
+ kernel.packet[3].v4f[1] = t3.packet[1];
872
+ }
873
+
874
+ template<> EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket) {
875
+ Packet2ul select_hi = { ifPacket.select[0], ifPacket.select[1] };
876
+ Packet2ul select_lo = { ifPacket.select[2], ifPacket.select[3] };
877
+ Packet2ul mask_hi = vec_cmpeq(select_hi, reinterpret_cast<Packet2ul>(p2l_ONE));
878
+ Packet2ul mask_lo = vec_cmpeq(select_lo, reinterpret_cast<Packet2ul>(p2l_ONE));
879
+ Packet4f result;
880
+ result.v4f[0] = vec_sel(elsePacket.v4f[0], thenPacket.v4f[0], mask_hi);
881
+ result.v4f[1] = vec_sel(elsePacket.v4f[1], thenPacket.v4f[1], mask_lo);
882
+ return result;
883
+ }
884
+
885
+ template<> Packet4f EIGEN_STRONG_INLINE pcmp_le<Packet4f>(const Packet4f& a, const Packet4f& b)
886
+ {
887
+ Packet4f res;
888
+ res.v4f[0] = pcmp_le(a.v4f[0], b.v4f[0]);
889
+ res.v4f[1] = pcmp_le(a.v4f[1], b.v4f[1]);
890
+ return res;
891
+ }
892
+
893
+ template<> Packet4f EIGEN_STRONG_INLINE pcmp_lt<Packet4f>(const Packet4f& a, const Packet4f& b)
894
+ {
895
+ Packet4f res;
896
+ res.v4f[0] = pcmp_lt(a.v4f[0], b.v4f[0]);
897
+ res.v4f[1] = pcmp_lt(a.v4f[1], b.v4f[1]);
898
+ return res;
899
+ }
900
+
901
+ template<> Packet4f EIGEN_STRONG_INLINE pcmp_eq<Packet4f>(const Packet4f& a, const Packet4f& b)
902
+ {
903
+ Packet4f res;
904
+ res.v4f[0] = pcmp_eq(a.v4f[0], b.v4f[0]);
905
+ res.v4f[1] = pcmp_eq(a.v4f[1], b.v4f[1]);
906
+ return res;
907
+ }
908
+
909
+ #else
910
+ template<> EIGEN_STRONG_INLINE Packet4f pload<Packet4f>(const float* from)
911
+ {
912
+ // FIXME: No intrinsic yet
913
+ EIGEN_DEBUG_ALIGNED_LOAD
914
+ Packet *vfrom;
915
+ vfrom = (Packet *) from;
916
+ return vfrom->v4f;
917
+ }
918
+
919
+ template<> EIGEN_STRONG_INLINE void pstore<float>(float* to, const Packet4f& from)
920
+ {
921
+ // FIXME: No intrinsic yet
922
+ EIGEN_DEBUG_ALIGNED_STORE
923
+ Packet *vto;
924
+ vto = (Packet *) to;
925
+ vto->v4f = from;
926
+ }
927
+
928
+ template<> EIGEN_STRONG_INLINE Packet4f pset1<Packet4f>(const float& from)
929
+ {
930
+ return vec_splats(from);
931
+ }
932
+
933
+ template<> EIGEN_STRONG_INLINE void
934
+ pbroadcast4<Packet4f>(const float *a,
935
+ Packet4f& a0, Packet4f& a1, Packet4f& a2, Packet4f& a3)
936
+ {
937
+ a3 = pload<Packet4f>(a);
938
+ a0 = vec_splat(a3, 0);
939
+ a1 = vec_splat(a3, 1);
940
+ a2 = vec_splat(a3, 2);
941
+ a3 = vec_splat(a3, 3);
942
+ }
943
+
944
+ template<> EIGEN_DEVICE_FUNC inline Packet4f pgather<float, Packet4f>(const float* from, Index stride)
945
+ {
946
+ float EIGEN_ALIGN16 af[4];
947
+ af[0] = from[0*stride];
948
+ af[1] = from[1*stride];
949
+ af[2] = from[2*stride];
950
+ af[3] = from[3*stride];
951
+ return pload<Packet4f>(af);
952
+ }
953
+
954
+ template<> EIGEN_DEVICE_FUNC inline void pscatter<float, Packet4f>(float* to, const Packet4f& from, Index stride)
955
+ {
956
+ float EIGEN_ALIGN16 af[4];
957
+ pstore<float>((float*)af, from);
958
+ to[0*stride] = af[0];
959
+ to[1*stride] = af[1];
960
+ to[2*stride] = af[2];
961
+ to[3*stride] = af[3];
962
+ }
963
+
964
+ template<> EIGEN_STRONG_INLINE Packet4f padd<Packet4f>(const Packet4f& a, const Packet4f& b) { return (a + b); }
965
+ template<> EIGEN_STRONG_INLINE Packet4f psub<Packet4f>(const Packet4f& a, const Packet4f& b) { return (a - b); }
966
+ template<> EIGEN_STRONG_INLINE Packet4f pmul<Packet4f>(const Packet4f& a, const Packet4f& b) { return (a * b); }
967
+ template<> EIGEN_STRONG_INLINE Packet4f pdiv<Packet4f>(const Packet4f& a, const Packet4f& b) { return (a / b); }
968
+ template<> EIGEN_STRONG_INLINE Packet4f pnegate<Packet4f>(const Packet4f& a) { return (-a); }
969
+ template<> EIGEN_STRONG_INLINE Packet4f pconj<Packet4f> (const Packet4f& a) { return a; }
970
+ template<> EIGEN_STRONG_INLINE Packet4f pmadd<Packet4f> (const Packet4f& a, const Packet4f& b, const Packet4f& c) { return vec_madd(a, b, c); }
971
+ template<> EIGEN_STRONG_INLINE Packet4f pmin<Packet4f> (const Packet4f& a, const Packet4f& b) { return vec_min(a, b); }
972
+ template<> EIGEN_STRONG_INLINE Packet4f pmax<Packet4f> (const Packet4f& a, const Packet4f& b) { return vec_max(a, b); }
973
+ template<> EIGEN_STRONG_INLINE Packet4f pand<Packet4f> (const Packet4f& a, const Packet4f& b) { return vec_and(a, b); }
974
+ template<> EIGEN_STRONG_INLINE Packet4f por<Packet4f> (const Packet4f& a, const Packet4f& b) { return vec_or(a, b); }
975
+ template<> EIGEN_STRONG_INLINE Packet4f pxor<Packet4f> (const Packet4f& a, const Packet4f& b) { return vec_xor(a, b); }
976
+ template<> EIGEN_STRONG_INLINE Packet4f pandnot<Packet4f>(const Packet4f& a, const Packet4f& b) { return vec_and(a, vec_nor(b, b)); }
977
+ template<> EIGEN_STRONG_INLINE Packet4f pround<Packet4f> (const Packet4f& a) { return vec_round(a); }
978
+ template<> EIGEN_STRONG_INLINE Packet4f pceil<Packet4f> (const Packet4f& a) { return vec_ceil(a); }
979
+ template<> EIGEN_STRONG_INLINE Packet4f pfloor<Packet4f> (const Packet4f& a) { return vec_floor(a); }
980
+ template<> EIGEN_STRONG_INLINE Packet4f pabs<Packet4f> (const Packet4f& a) { return vec_abs(a); }
981
+ template<> EIGEN_STRONG_INLINE float pfirst<Packet4f>(const Packet4f& a) { float EIGEN_ALIGN16 x[4]; pstore(x, a); return x[0]; }
982
+
983
+ template<> EIGEN_STRONG_INLINE Packet4f ploaddup<Packet4f>(const float* from)
984
+ {
985
+ Packet4f p = pload<Packet4f>(from);
986
+ return vec_perm(p, p, p16uc_DUPLICATE32_HI);
987
+ }
988
+
989
+ template<> EIGEN_STRONG_INLINE Packet4f preverse(const Packet4f& a)
990
+ {
991
+ return reinterpret_cast<Packet4f>(vec_perm(reinterpret_cast<Packet16uc>(a), reinterpret_cast<Packet16uc>(a), p16uc_REVERSE32));
992
+ }
993
+
994
+ template<> EIGEN_STRONG_INLINE float predux<Packet4f>(const Packet4f& a)
995
+ {
996
+ Packet4f b, sum;
997
+ b = vec_sld(a, a, 8);
998
+ sum = padd<Packet4f>(a, b);
999
+ b = vec_sld(sum, sum, 4);
1000
+ sum = padd<Packet4f>(sum, b);
1001
+ return pfirst(sum);
1002
+ }
1003
+
1004
+ // Other reduction functions:
1005
+ // mul
1006
+ template<> EIGEN_STRONG_INLINE float predux_mul<Packet4f>(const Packet4f& a)
1007
+ {
1008
+ Packet4f prod;
1009
+ prod = pmul(a, vec_sld(a, a, 8));
1010
+ return pfirst(pmul(prod, vec_sld(prod, prod, 4)));
1011
+ }
1012
+
1013
+ // min
1014
+ template<> EIGEN_STRONG_INLINE float predux_min<Packet4f>(const Packet4f& a)
1015
+ {
1016
+ Packet4f b, res;
1017
+ b = pmin<Packet4f>(a, vec_sld(a, a, 8));
1018
+ res = pmin<Packet4f>(b, vec_sld(b, b, 4));
1019
+ return pfirst(res);
1020
+ }
1021
+
1022
+ // max
1023
+ template<> EIGEN_STRONG_INLINE float predux_max<Packet4f>(const Packet4f& a)
1024
+ {
1025
+ Packet4f b, res;
1026
+ b = pmax<Packet4f>(a, vec_sld(a, a, 8));
1027
+ res = pmax<Packet4f>(b, vec_sld(b, b, 4));
1028
+ return pfirst(res);
1029
+ }
1030
+
1031
+ EIGEN_DEVICE_FUNC inline void
1032
+ ptranspose(PacketBlock<Packet4f,4>& kernel) {
1033
+ Packet4f t0 = vec_mergeh(kernel.packet[0], kernel.packet[2]);
1034
+ Packet4f t1 = vec_mergel(kernel.packet[0], kernel.packet[2]);
1035
+ Packet4f t2 = vec_mergeh(kernel.packet[1], kernel.packet[3]);
1036
+ Packet4f t3 = vec_mergel(kernel.packet[1], kernel.packet[3]);
1037
+ kernel.packet[0] = vec_mergeh(t0, t2);
1038
+ kernel.packet[1] = vec_mergel(t0, t2);
1039
+ kernel.packet[2] = vec_mergeh(t1, t3);
1040
+ kernel.packet[3] = vec_mergel(t1, t3);
1041
+ }
1042
+
1043
+ template<> EIGEN_STRONG_INLINE Packet4f pblend(const Selector<4>& ifPacket, const Packet4f& thenPacket, const Packet4f& elsePacket) {
1044
+ Packet4ui select = { ifPacket.select[0], ifPacket.select[1], ifPacket.select[2], ifPacket.select[3] };
1045
+ Packet4ui mask = vec_cmpeq(select, reinterpret_cast<Packet4ui>(p4i_ONE));
1046
+ return vec_sel(elsePacket, thenPacket, mask);
1047
+ }
1048
+
1049
+ #endif
1050
+
1051
+ template<> EIGEN_STRONG_INLINE void prefetch<float>(const float* addr) { EIGEN_ZVECTOR_PREFETCH(addr); }
1052
+ template<> EIGEN_STRONG_INLINE Packet4f ploadu<Packet4f> (const float* from) { return pload<Packet4f>(from); }
1053
+ template<> EIGEN_STRONG_INLINE void pstoreu<float>(float* to, const Packet4f& from) { pstore<float>(to, from); }
1054
+ template<> EIGEN_STRONG_INLINE Packet4f plset<Packet4f> (const float& a) { return padd<Packet4f>(pset1<Packet4f>(a), p4f_COUNTDOWN); }
1055
+
1056
+ } // end namespace internal
1057
+
1058
+ } // end namespace Eigen
1059
+
1060
+ #endif // EIGEN_PACKET_MATH_ZVECTOR_H