tomoto 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (369) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +10 -0
  3. data/README.md +8 -10
  4. data/ext/tomoto/ct.cpp +11 -11
  5. data/ext/tomoto/dmr.cpp +14 -13
  6. data/ext/tomoto/dt.cpp +14 -14
  7. data/ext/tomoto/extconf.rb +7 -5
  8. data/ext/tomoto/gdmr.cpp +7 -7
  9. data/ext/tomoto/hdp.cpp +9 -9
  10. data/ext/tomoto/hlda.cpp +13 -13
  11. data/ext/tomoto/hpa.cpp +5 -5
  12. data/ext/tomoto/lda.cpp +42 -39
  13. data/ext/tomoto/llda.cpp +6 -6
  14. data/ext/tomoto/mglda.cpp +15 -15
  15. data/ext/tomoto/pa.cpp +6 -6
  16. data/ext/tomoto/plda.cpp +6 -6
  17. data/ext/tomoto/slda.cpp +8 -8
  18. data/ext/tomoto/{ext.cpp → tomoto.cpp} +8 -8
  19. data/ext/tomoto/utils.h +16 -70
  20. data/lib/tomoto/version.rb +1 -1
  21. data/lib/tomoto.rb +5 -1
  22. data/vendor/EigenRand/EigenRand/Core.h +10 -10
  23. data/vendor/EigenRand/EigenRand/Dists/Basic.h +208 -9
  24. data/vendor/EigenRand/EigenRand/Dists/Discrete.h +52 -31
  25. data/vendor/EigenRand/EigenRand/Dists/GammaPoisson.h +9 -8
  26. data/vendor/EigenRand/EigenRand/Dists/NormalExp.h +28 -21
  27. data/vendor/EigenRand/EigenRand/EigenRand +11 -6
  28. data/vendor/EigenRand/EigenRand/Macro.h +13 -7
  29. data/vendor/EigenRand/EigenRand/MorePacketMath.h +348 -740
  30. data/vendor/EigenRand/EigenRand/MvDists/Multinomial.h +5 -3
  31. data/vendor/EigenRand/EigenRand/MvDists/MvNormal.h +9 -3
  32. data/vendor/EigenRand/EigenRand/PacketFilter.h +11 -253
  33. data/vendor/EigenRand/EigenRand/PacketRandomEngine.h +21 -47
  34. data/vendor/EigenRand/EigenRand/RandUtils.h +50 -344
  35. data/vendor/EigenRand/EigenRand/arch/AVX/MorePacketMath.h +619 -0
  36. data/vendor/EigenRand/EigenRand/arch/AVX/PacketFilter.h +149 -0
  37. data/vendor/EigenRand/EigenRand/arch/AVX/RandUtils.h +228 -0
  38. data/vendor/EigenRand/EigenRand/arch/NEON/MorePacketMath.h +473 -0
  39. data/vendor/EigenRand/EigenRand/arch/NEON/PacketFilter.h +142 -0
  40. data/vendor/EigenRand/EigenRand/arch/NEON/RandUtils.h +126 -0
  41. data/vendor/EigenRand/EigenRand/arch/SSE/MorePacketMath.h +501 -0
  42. data/vendor/EigenRand/EigenRand/arch/SSE/PacketFilter.h +133 -0
  43. data/vendor/EigenRand/EigenRand/arch/SSE/RandUtils.h +120 -0
  44. data/vendor/EigenRand/EigenRand/doc.h +24 -12
  45. data/vendor/EigenRand/README.md +57 -4
  46. data/vendor/eigen/COPYING.APACHE +203 -0
  47. data/vendor/eigen/COPYING.BSD +1 -1
  48. data/vendor/eigen/COPYING.MINPACK +51 -52
  49. data/vendor/eigen/Eigen/Cholesky +0 -1
  50. data/vendor/eigen/Eigen/Core +112 -265
  51. data/vendor/eigen/Eigen/Eigenvalues +2 -3
  52. data/vendor/eigen/Eigen/Geometry +5 -8
  53. data/vendor/eigen/Eigen/Householder +0 -1
  54. data/vendor/eigen/Eigen/Jacobi +0 -1
  55. data/vendor/eigen/Eigen/KLUSupport +41 -0
  56. data/vendor/eigen/Eigen/LU +2 -5
  57. data/vendor/eigen/Eigen/OrderingMethods +0 -3
  58. data/vendor/eigen/Eigen/PaStiXSupport +1 -0
  59. data/vendor/eigen/Eigen/PardisoSupport +0 -0
  60. data/vendor/eigen/Eigen/QR +2 -3
  61. data/vendor/eigen/Eigen/QtAlignedMalloc +0 -1
  62. data/vendor/eigen/Eigen/SVD +0 -1
  63. data/vendor/eigen/Eigen/Sparse +0 -2
  64. data/vendor/eigen/Eigen/SparseCholesky +0 -8
  65. data/vendor/eigen/Eigen/SparseLU +4 -0
  66. data/vendor/eigen/Eigen/SparseQR +0 -1
  67. data/vendor/eigen/Eigen/src/Cholesky/LDLT.h +42 -27
  68. data/vendor/eigen/Eigen/src/Cholesky/LLT.h +39 -23
  69. data/vendor/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +90 -47
  70. data/vendor/eigen/Eigen/src/Core/ArithmeticSequence.h +413 -0
  71. data/vendor/eigen/Eigen/src/Core/Array.h +99 -11
  72. data/vendor/eigen/Eigen/src/Core/ArrayBase.h +3 -3
  73. data/vendor/eigen/Eigen/src/Core/ArrayWrapper.h +21 -21
  74. data/vendor/eigen/Eigen/src/Core/Assign.h +1 -1
  75. data/vendor/eigen/Eigen/src/Core/AssignEvaluator.h +125 -50
  76. data/vendor/eigen/Eigen/src/Core/Assign_MKL.h +10 -10
  77. data/vendor/eigen/Eigen/src/Core/BandMatrix.h +16 -16
  78. data/vendor/eigen/Eigen/src/Core/Block.h +56 -60
  79. data/vendor/eigen/Eigen/src/Core/BooleanRedux.h +29 -31
  80. data/vendor/eigen/Eigen/src/Core/CommaInitializer.h +7 -3
  81. data/vendor/eigen/Eigen/src/Core/CoreEvaluators.h +325 -272
  82. data/vendor/eigen/Eigen/src/Core/CoreIterators.h +5 -0
  83. data/vendor/eigen/Eigen/src/Core/CwiseBinaryOp.h +21 -22
  84. data/vendor/eigen/Eigen/src/Core/CwiseNullaryOp.h +153 -18
  85. data/vendor/eigen/Eigen/src/Core/CwiseUnaryOp.h +6 -6
  86. data/vendor/eigen/Eigen/src/Core/CwiseUnaryView.h +14 -10
  87. data/vendor/eigen/Eigen/src/Core/DenseBase.h +132 -42
  88. data/vendor/eigen/Eigen/src/Core/DenseCoeffsBase.h +25 -21
  89. data/vendor/eigen/Eigen/src/Core/DenseStorage.h +153 -71
  90. data/vendor/eigen/Eigen/src/Core/Diagonal.h +21 -23
  91. data/vendor/eigen/Eigen/src/Core/DiagonalMatrix.h +50 -2
  92. data/vendor/eigen/Eigen/src/Core/DiagonalProduct.h +1 -1
  93. data/vendor/eigen/Eigen/src/Core/Dot.h +10 -10
  94. data/vendor/eigen/Eigen/src/Core/EigenBase.h +10 -9
  95. data/vendor/eigen/Eigen/src/Core/ForceAlignedAccess.h +8 -4
  96. data/vendor/eigen/Eigen/src/Core/Fuzzy.h +3 -3
  97. data/vendor/eigen/Eigen/src/Core/GeneralProduct.h +20 -10
  98. data/vendor/eigen/Eigen/src/Core/GenericPacketMath.h +599 -152
  99. data/vendor/eigen/Eigen/src/Core/GlobalFunctions.h +40 -33
  100. data/vendor/eigen/Eigen/src/Core/IO.h +40 -7
  101. data/vendor/eigen/Eigen/src/Core/IndexedView.h +237 -0
  102. data/vendor/eigen/Eigen/src/Core/Inverse.h +9 -10
  103. data/vendor/eigen/Eigen/src/Core/Map.h +7 -7
  104. data/vendor/eigen/Eigen/src/Core/MapBase.h +10 -3
  105. data/vendor/eigen/Eigen/src/Core/MathFunctions.h +767 -125
  106. data/vendor/eigen/Eigen/src/Core/MathFunctionsImpl.h +118 -19
  107. data/vendor/eigen/Eigen/src/Core/Matrix.h +131 -25
  108. data/vendor/eigen/Eigen/src/Core/MatrixBase.h +21 -3
  109. data/vendor/eigen/Eigen/src/Core/NestByValue.h +25 -50
  110. data/vendor/eigen/Eigen/src/Core/NoAlias.h +4 -3
  111. data/vendor/eigen/Eigen/src/Core/NumTraits.h +107 -20
  112. data/vendor/eigen/Eigen/src/Core/PartialReduxEvaluator.h +232 -0
  113. data/vendor/eigen/Eigen/src/Core/PermutationMatrix.h +3 -31
  114. data/vendor/eigen/Eigen/src/Core/PlainObjectBase.h +152 -59
  115. data/vendor/eigen/Eigen/src/Core/Product.h +30 -25
  116. data/vendor/eigen/Eigen/src/Core/ProductEvaluators.h +192 -125
  117. data/vendor/eigen/Eigen/src/Core/Random.h +37 -1
  118. data/vendor/eigen/Eigen/src/Core/Redux.h +180 -170
  119. data/vendor/eigen/Eigen/src/Core/Ref.h +121 -23
  120. data/vendor/eigen/Eigen/src/Core/Replicate.h +8 -8
  121. data/vendor/eigen/Eigen/src/Core/Reshaped.h +454 -0
  122. data/vendor/eigen/Eigen/src/Core/ReturnByValue.h +7 -5
  123. data/vendor/eigen/Eigen/src/Core/Reverse.h +18 -12
  124. data/vendor/eigen/Eigen/src/Core/Select.h +8 -6
  125. data/vendor/eigen/Eigen/src/Core/SelfAdjointView.h +33 -20
  126. data/vendor/eigen/Eigen/src/Core/Solve.h +14 -14
  127. data/vendor/eigen/Eigen/src/Core/SolveTriangular.h +16 -16
  128. data/vendor/eigen/Eigen/src/Core/SolverBase.h +41 -3
  129. data/vendor/eigen/Eigen/src/Core/StableNorm.h +100 -70
  130. data/vendor/eigen/Eigen/src/Core/StlIterators.h +463 -0
  131. data/vendor/eigen/Eigen/src/Core/Stride.h +9 -4
  132. data/vendor/eigen/Eigen/src/Core/Swap.h +5 -4
  133. data/vendor/eigen/Eigen/src/Core/Transpose.h +88 -27
  134. data/vendor/eigen/Eigen/src/Core/Transpositions.h +26 -47
  135. data/vendor/eigen/Eigen/src/Core/TriangularMatrix.h +93 -75
  136. data/vendor/eigen/Eigen/src/Core/VectorBlock.h +5 -5
  137. data/vendor/eigen/Eigen/src/Core/VectorwiseOp.h +159 -70
  138. data/vendor/eigen/Eigen/src/Core/Visitor.h +137 -29
  139. data/vendor/eigen/Eigen/src/Core/arch/AVX/Complex.h +50 -129
  140. data/vendor/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +126 -337
  141. data/vendor/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +1092 -155
  142. data/vendor/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +65 -1
  143. data/vendor/eigen/Eigen/src/Core/arch/AVX512/Complex.h +422 -0
  144. data/vendor/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +207 -236
  145. data/vendor/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1482 -495
  146. data/vendor/eigen/Eigen/src/Core/arch/AVX512/TypeCasting.h +89 -0
  147. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +152 -165
  148. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +19 -251
  149. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProduct.h +2937 -0
  150. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductCommon.h +221 -0
  151. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MatrixProductMMA.h +629 -0
  152. data/vendor/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +2042 -392
  153. data/vendor/eigen/Eigen/src/Core/arch/CUDA/Complex.h +235 -80
  154. data/vendor/eigen/Eigen/src/Core/arch/Default/BFloat16.h +700 -0
  155. data/vendor/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +102 -14
  156. data/vendor/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctions.h +1649 -0
  157. data/vendor/eigen/Eigen/src/Core/arch/Default/GenericPacketMathFunctionsFwd.h +110 -0
  158. data/vendor/eigen/Eigen/src/Core/arch/Default/Half.h +942 -0
  159. data/vendor/eigen/Eigen/src/Core/arch/Default/Settings.h +1 -1
  160. data/vendor/eigen/Eigen/src/Core/arch/Default/TypeCasting.h +120 -0
  161. data/vendor/eigen/Eigen/src/Core/arch/{CUDA → GPU}/MathFunctions.h +16 -4
  162. data/vendor/eigen/Eigen/src/Core/arch/GPU/PacketMath.h +1685 -0
  163. data/vendor/eigen/Eigen/src/Core/arch/GPU/TypeCasting.h +80 -0
  164. data/vendor/eigen/Eigen/src/Core/arch/HIP/hcc/math_constants.h +23 -0
  165. data/vendor/eigen/Eigen/src/Core/arch/MSA/Complex.h +648 -0
  166. data/vendor/eigen/Eigen/src/Core/arch/MSA/MathFunctions.h +387 -0
  167. data/vendor/eigen/Eigen/src/Core/arch/MSA/PacketMath.h +1233 -0
  168. data/vendor/eigen/Eigen/src/Core/arch/NEON/Complex.h +313 -219
  169. data/vendor/eigen/Eigen/src/Core/arch/NEON/GeneralBlockPanelKernel.h +183 -0
  170. data/vendor/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +54 -70
  171. data/vendor/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +4376 -549
  172. data/vendor/eigen/Eigen/src/Core/arch/NEON/TypeCasting.h +1419 -0
  173. data/vendor/eigen/Eigen/src/Core/arch/SSE/Complex.h +59 -179
  174. data/vendor/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +65 -428
  175. data/vendor/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +893 -283
  176. data/vendor/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +65 -0
  177. data/vendor/eigen/Eigen/src/Core/arch/SVE/MathFunctions.h +44 -0
  178. data/vendor/eigen/Eigen/src/Core/arch/SVE/PacketMath.h +752 -0
  179. data/vendor/eigen/Eigen/src/Core/arch/SVE/TypeCasting.h +49 -0
  180. data/vendor/eigen/Eigen/src/Core/arch/SYCL/InteropHeaders.h +232 -0
  181. data/vendor/eigen/Eigen/src/Core/arch/SYCL/MathFunctions.h +301 -0
  182. data/vendor/eigen/Eigen/src/Core/arch/SYCL/PacketMath.h +670 -0
  183. data/vendor/eigen/Eigen/src/Core/arch/SYCL/SyclMemoryModel.h +694 -0
  184. data/vendor/eigen/Eigen/src/Core/arch/SYCL/TypeCasting.h +85 -0
  185. data/vendor/eigen/Eigen/src/Core/arch/ZVector/Complex.h +212 -183
  186. data/vendor/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +101 -5
  187. data/vendor/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +510 -395
  188. data/vendor/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +11 -2
  189. data/vendor/eigen/Eigen/src/Core/functors/BinaryFunctors.h +112 -46
  190. data/vendor/eigen/Eigen/src/Core/functors/NullaryFunctors.h +31 -30
  191. data/vendor/eigen/Eigen/src/Core/functors/StlFunctors.h +32 -2
  192. data/vendor/eigen/Eigen/src/Core/functors/UnaryFunctors.h +355 -16
  193. data/vendor/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +1075 -586
  194. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +49 -24
  195. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +41 -35
  196. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +6 -6
  197. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +4 -2
  198. data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +382 -483
  199. data/vendor/eigen/Eigen/src/Core/products/Parallelizer.h +22 -5
  200. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +53 -30
  201. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +16 -8
  202. data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +8 -6
  203. data/vendor/eigen/Eigen/src/Core/products/SelfadjointProduct.h +4 -4
  204. data/vendor/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +5 -4
  205. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +33 -27
  206. data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +14 -12
  207. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +36 -34
  208. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +8 -4
  209. data/vendor/eigen/Eigen/src/Core/products/TriangularSolverVector.h +13 -10
  210. data/vendor/eigen/Eigen/src/Core/util/BlasUtil.h +304 -119
  211. data/vendor/eigen/Eigen/src/Core/util/ConfigureVectorization.h +512 -0
  212. data/vendor/eigen/Eigen/src/Core/util/Constants.h +25 -9
  213. data/vendor/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +26 -3
  214. data/vendor/eigen/Eigen/src/Core/util/ForwardDeclarations.h +29 -9
  215. data/vendor/eigen/Eigen/src/Core/util/IndexedViewHelper.h +186 -0
  216. data/vendor/eigen/Eigen/src/Core/util/IntegralConstant.h +272 -0
  217. data/vendor/eigen/Eigen/src/Core/util/MKL_support.h +8 -1
  218. data/vendor/eigen/Eigen/src/Core/util/Macros.h +709 -246
  219. data/vendor/eigen/Eigen/src/Core/util/Memory.h +222 -52
  220. data/vendor/eigen/Eigen/src/Core/util/Meta.h +355 -77
  221. data/vendor/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +5 -1
  222. data/vendor/eigen/Eigen/src/Core/util/ReshapedHelper.h +51 -0
  223. data/vendor/eigen/Eigen/src/Core/util/StaticAssert.h +8 -5
  224. data/vendor/eigen/Eigen/src/Core/util/SymbolicIndex.h +293 -0
  225. data/vendor/eigen/Eigen/src/Core/util/XprHelper.h +65 -30
  226. data/vendor/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +1 -1
  227. data/vendor/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +7 -4
  228. data/vendor/eigen/Eigen/src/Eigenvalues/EigenSolver.h +2 -2
  229. data/vendor/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +1 -1
  230. data/vendor/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +2 -2
  231. data/vendor/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +2 -2
  232. data/vendor/eigen/Eigen/src/Eigenvalues/RealQZ.h +9 -6
  233. data/vendor/eigen/Eigen/src/Eigenvalues/RealSchur.h +21 -9
  234. data/vendor/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +77 -43
  235. data/vendor/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +20 -15
  236. data/vendor/eigen/Eigen/src/Geometry/AlignedBox.h +99 -5
  237. data/vendor/eigen/Eigen/src/Geometry/AngleAxis.h +4 -4
  238. data/vendor/eigen/Eigen/src/Geometry/EulerAngles.h +3 -3
  239. data/vendor/eigen/Eigen/src/Geometry/Homogeneous.h +15 -11
  240. data/vendor/eigen/Eigen/src/Geometry/Hyperplane.h +1 -1
  241. data/vendor/eigen/Eigen/src/Geometry/OrthoMethods.h +3 -2
  242. data/vendor/eigen/Eigen/src/Geometry/ParametrizedLine.h +39 -2
  243. data/vendor/eigen/Eigen/src/Geometry/Quaternion.h +70 -14
  244. data/vendor/eigen/Eigen/src/Geometry/Rotation2D.h +3 -3
  245. data/vendor/eigen/Eigen/src/Geometry/Scaling.h +23 -5
  246. data/vendor/eigen/Eigen/src/Geometry/Transform.h +88 -67
  247. data/vendor/eigen/Eigen/src/Geometry/Translation.h +6 -12
  248. data/vendor/eigen/Eigen/src/Geometry/Umeyama.h +1 -1
  249. data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SIMD.h +168 -0
  250. data/vendor/eigen/Eigen/src/Householder/BlockHouseholder.h +9 -2
  251. data/vendor/eigen/Eigen/src/Householder/Householder.h +8 -4
  252. data/vendor/eigen/Eigen/src/Householder/HouseholderSequence.h +123 -48
  253. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +15 -15
  254. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +7 -23
  255. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +5 -22
  256. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +41 -47
  257. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +51 -60
  258. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +70 -20
  259. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +2 -20
  260. data/vendor/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +11 -9
  261. data/vendor/eigen/Eigen/src/Jacobi/Jacobi.h +31 -10
  262. data/vendor/eigen/Eigen/src/KLUSupport/KLUSupport.h +358 -0
  263. data/vendor/eigen/Eigen/src/LU/Determinant.h +35 -19
  264. data/vendor/eigen/Eigen/src/LU/FullPivLU.h +29 -43
  265. data/vendor/eigen/Eigen/src/LU/InverseImpl.h +25 -8
  266. data/vendor/eigen/Eigen/src/LU/PartialPivLU.h +71 -58
  267. data/vendor/eigen/Eigen/src/LU/arch/InverseSize4.h +351 -0
  268. data/vendor/eigen/Eigen/src/OrderingMethods/Amd.h +7 -17
  269. data/vendor/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +297 -277
  270. data/vendor/eigen/Eigen/src/OrderingMethods/Ordering.h +6 -10
  271. data/vendor/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +1 -1
  272. data/vendor/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +9 -7
  273. data/vendor/eigen/Eigen/src/QR/ColPivHouseholderQR.h +41 -20
  274. data/vendor/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +100 -27
  275. data/vendor/eigen/Eigen/src/QR/FullPivHouseholderQR.h +59 -22
  276. data/vendor/eigen/Eigen/src/QR/HouseholderQR.h +48 -23
  277. data/vendor/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +25 -3
  278. data/vendor/eigen/Eigen/src/SVD/BDCSVD.h +183 -63
  279. data/vendor/eigen/Eigen/src/SVD/JacobiSVD.h +22 -14
  280. data/vendor/eigen/Eigen/src/SVD/SVDBase.h +83 -22
  281. data/vendor/eigen/Eigen/src/SVD/UpperBidiagonalization.h +3 -3
  282. data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +17 -9
  283. data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +12 -37
  284. data/vendor/eigen/Eigen/src/SparseCore/AmbiVector.h +3 -2
  285. data/vendor/eigen/Eigen/src/SparseCore/CompressedStorage.h +16 -0
  286. data/vendor/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +6 -6
  287. data/vendor/eigen/Eigen/src/SparseCore/SparseAssign.h +81 -27
  288. data/vendor/eigen/Eigen/src/SparseCore/SparseBlock.h +25 -57
  289. data/vendor/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +40 -11
  290. data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +11 -15
  291. data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +4 -2
  292. data/vendor/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +30 -8
  293. data/vendor/eigen/Eigen/src/SparseCore/SparseMatrix.h +126 -11
  294. data/vendor/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +5 -12
  295. data/vendor/eigen/Eigen/src/SparseCore/SparseProduct.h +13 -1
  296. data/vendor/eigen/Eigen/src/SparseCore/SparseRef.h +7 -7
  297. data/vendor/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +5 -2
  298. data/vendor/eigen/Eigen/src/SparseCore/SparseUtil.h +8 -0
  299. data/vendor/eigen/Eigen/src/SparseCore/SparseVector.h +1 -1
  300. data/vendor/eigen/Eigen/src/SparseCore/SparseView.h +1 -0
  301. data/vendor/eigen/Eigen/src/SparseLU/SparseLU.h +162 -12
  302. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +1 -1
  303. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +76 -2
  304. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +2 -2
  305. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +1 -1
  306. data/vendor/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +1 -1
  307. data/vendor/eigen/Eigen/src/SparseQR/SparseQR.h +19 -6
  308. data/vendor/eigen/Eigen/src/StlSupport/StdDeque.h +2 -12
  309. data/vendor/eigen/Eigen/src/StlSupport/StdList.h +2 -2
  310. data/vendor/eigen/Eigen/src/StlSupport/StdVector.h +2 -2
  311. data/vendor/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +6 -8
  312. data/vendor/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +175 -39
  313. data/vendor/eigen/Eigen/src/misc/lapacke.h +5 -4
  314. data/vendor/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +28 -2
  315. data/vendor/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +155 -11
  316. data/vendor/eigen/Eigen/src/plugins/BlockMethods.h +626 -242
  317. data/vendor/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +14 -0
  318. data/vendor/eigen/Eigen/src/plugins/IndexedViewMethods.h +262 -0
  319. data/vendor/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +4 -4
  320. data/vendor/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +10 -0
  321. data/vendor/eigen/Eigen/src/plugins/ReshapedMethods.h +149 -0
  322. data/vendor/eigen/README.md +2 -0
  323. data/vendor/eigen/bench/btl/README +1 -1
  324. data/vendor/eigen/bench/tensors/README +6 -7
  325. data/vendor/eigen/ci/README.md +56 -0
  326. data/vendor/eigen/demos/mix_eigen_and_c/README +1 -1
  327. data/vendor/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md +213 -158
  328. data/vendor/eigen/unsupported/README.txt +1 -1
  329. data/vendor/tomotopy/README.kr.rst +78 -0
  330. data/vendor/tomotopy/README.rst +75 -0
  331. data/vendor/tomotopy/src/Labeling/FoRelevance.cpp +2 -2
  332. data/vendor/tomotopy/src/Labeling/Phraser.hpp +4 -4
  333. data/vendor/tomotopy/src/TopicModel/CTModel.hpp +7 -3
  334. data/vendor/tomotopy/src/TopicModel/DMRModel.hpp +7 -3
  335. data/vendor/tomotopy/src/TopicModel/DTModel.hpp +6 -3
  336. data/vendor/tomotopy/src/TopicModel/GDMRModel.hpp +2 -2
  337. data/vendor/tomotopy/src/TopicModel/HDP.h +1 -0
  338. data/vendor/tomotopy/src/TopicModel/HDPModel.hpp +57 -6
  339. data/vendor/tomotopy/src/TopicModel/HLDAModel.hpp +6 -3
  340. data/vendor/tomotopy/src/TopicModel/HPAModel.hpp +3 -2
  341. data/vendor/tomotopy/src/TopicModel/LDA.h +3 -3
  342. data/vendor/tomotopy/src/TopicModel/LDACVB0Model.hpp +5 -5
  343. data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +50 -19
  344. data/vendor/tomotopy/src/TopicModel/LLDAModel.hpp +6 -2
  345. data/vendor/tomotopy/src/TopicModel/MGLDAModel.hpp +3 -2
  346. data/vendor/tomotopy/src/TopicModel/PAModel.hpp +1 -1
  347. data/vendor/tomotopy/src/TopicModel/PLDAModel.hpp +6 -2
  348. data/vendor/tomotopy/src/TopicModel/PT.h +3 -1
  349. data/vendor/tomotopy/src/TopicModel/PTModel.hpp +36 -3
  350. data/vendor/tomotopy/src/TopicModel/SLDAModel.hpp +6 -3
  351. data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +55 -26
  352. data/vendor/tomotopy/src/Utils/AliasMethod.hpp +5 -4
  353. data/vendor/tomotopy/src/Utils/Dictionary.h +2 -2
  354. data/vendor/tomotopy/src/Utils/EigenAddonOps.hpp +36 -1
  355. data/vendor/tomotopy/src/Utils/MultiNormalDistribution.hpp +1 -1
  356. data/vendor/tomotopy/src/Utils/TruncMultiNormal.hpp +1 -1
  357. data/vendor/tomotopy/src/Utils/exception.h +6 -0
  358. data/vendor/tomotopy/src/Utils/math.h +2 -2
  359. data/vendor/tomotopy/src/Utils/sample.hpp +14 -12
  360. data/vendor/tomotopy/src/Utils/serializer.hpp +30 -5
  361. data/vendor/tomotopy/src/Utils/sse_gamma.h +0 -3
  362. metadata +64 -18
  363. data/vendor/eigen/Eigen/CMakeLists.txt +0 -19
  364. data/vendor/eigen/Eigen/src/Core/arch/CUDA/Half.h +0 -674
  365. data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +0 -333
  366. data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +0 -1124
  367. data/vendor/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +0 -212
  368. data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +0 -161
  369. data/vendor/eigen/Eigen/src/LU/arch/Inverse_SSE.h +0 -338
@@ -117,21 +117,36 @@ namespace tomoto
117
117
  template<>
118
118
  struct TwId<TermWeight::one>
119
119
  {
120
- static constexpr char TWID[] = "one\0";
120
+ static constexpr auto twid()
121
+ {
122
+ return serializer::to_key("one\0");
123
+ }
121
124
  };
122
125
 
123
126
  template<>
124
127
  struct TwId<TermWeight::idf>
125
128
  {
126
- static constexpr char TWID[] = "idf\0";
129
+ static constexpr auto twid()
130
+ {
131
+ return serializer::to_key("idf\0");
132
+ }
127
133
  };
128
134
 
129
135
  template<>
130
136
  struct TwId<TermWeight::pmi>
131
137
  {
132
- static constexpr char TWID[] = "pmi\0";
138
+ static constexpr auto twid()
139
+ {
140
+ return serializer::to_key("pmi\0");
141
+ }
133
142
  };
134
143
 
144
+ inline Float floorBit(Float x, int bitsUnderPoint = 8)
145
+ {
146
+ Float s = (1 << bitsUnderPoint);
147
+ return floor(x * s) / s;
148
+ }
149
+
135
150
  // to make HDP friend of LDA for HDPModel::converToLDA
136
151
  template<TermWeight _tw,
137
152
  typename _RandGen,
@@ -169,7 +184,11 @@ namespace tomoto
169
184
  typename>
170
185
  friend class HDPModel;
171
186
 
172
- static constexpr char TMID[] = "LDA\0";
187
+ static constexpr auto tmid()
188
+ {
189
+ return serializer::to_key("LDA\0");
190
+ }
191
+
173
192
  using WeightType = typename std::conditional<_tw == TermWeight::one, int32_t, float>::type;
174
193
 
175
194
  enum { m_flags = _Flags };
@@ -189,7 +208,7 @@ namespace tomoto
189
208
  struct ExtraDocData
190
209
  {
191
210
  std::vector<Vid> vChunkOffset;
192
- Eigen::Matrix<uint32_t, -1, -1> chunkOffsetByDoc;
211
+ Eigen::Matrix<size_t, -1, -1> chunkOffsetByDoc;
193
212
  };
194
213
 
195
214
  ExtraDocData eddTrain;
@@ -261,7 +280,7 @@ namespace tomoto
261
280
  }
262
281
 
263
282
  template<int _inc>
264
- inline void addWordTo(_ModelState& ld, _DocType& doc, uint32_t pid, Vid vid, Tid tid) const
283
+ inline void addWordTo(_ModelState& ld, _DocType& doc, size_t pid, Vid vid, Tid tid) const
265
284
  {
266
285
  assert(tid < K);
267
286
  assert(vid < this->realV);
@@ -469,11 +488,11 @@ namespace tomoto
469
488
  {
470
489
  if (_ps == ParallelScheme::partition)
471
490
  {
472
- return (this->realV + 3) / 4;
491
+ return std::max(((size_t)this->realV + 3) / 4, (size_t)1);
473
492
  }
474
493
  if (_ps == ParallelScheme::copy_merge)
475
494
  {
476
- return (this->docs.size() + 1) / 2;
495
+ return std::max((this->docs.size() + 1) / 2, (size_t)1);
477
496
  }
478
497
  return (size_t)-1;
479
498
  }
@@ -620,7 +639,7 @@ namespace tomoto
620
639
  for (Vid v = 0; v < V; ++v)
621
640
  {
622
641
  if (!ld.numByTopicWord(k, v)) continue;
623
- ll += math::lgammaT(ld.numByTopicWord(k, v) + etaByTopicWord(v, k)) - math::lgammaT(etaByTopicWord(v, k));
642
+ ll += math::lgammaT(ld.numByTopicWord(k, v) + etaByTopicWord(k, v)) - math::lgammaT(etaByTopicWord(k, v));
624
643
  assert(std::isfinite(ll));
625
644
  }
626
645
  }
@@ -972,12 +991,14 @@ namespace tomoto
972
991
 
973
992
  void setOptimInterval(size_t _optimInterval) override
974
993
  {
975
- optimInterval = _optimInterval;
994
+ if (_optimInterval > 0x7FFFFFFF) THROW_ERROR_WITH_INFO(exc::InvalidArgument, "wrong value");
995
+ optimInterval = (uint32_t)_optimInterval;
976
996
  }
977
997
 
978
998
  void setBurnInIteration(size_t iteration) override
979
999
  {
980
- burnIn = iteration;
1000
+ if (iteration > 0x7FFFFFFF) THROW_ERROR_WITH_INFO(exc::InvalidArgument, "wrong value");
1001
+ burnIn = (uint32_t)iteration;
981
1002
  }
982
1003
 
983
1004
  size_t addDoc(const RawDoc& rawDoc, const RawDocTokenizer::Factory& tokenizer) override
@@ -1008,6 +1029,11 @@ namespace tomoto
1008
1029
  if (p < 0) THROW_ERROR_WITH_INFO(exc::InvalidArgument, "priors must not be less than 0.");
1009
1030
  }
1010
1031
  this->dict.add(word);
1032
+ if (this->dict.size() > this->vocabCf.size())
1033
+ {
1034
+ this->vocabCf.resize(this->dict.size());
1035
+ this->vocabDf.resize(this->dict.size());
1036
+ }
1011
1037
  etaByWord.emplace(word, priors);
1012
1038
  }
1013
1039
 
@@ -1037,19 +1063,23 @@ namespace tomoto
1037
1063
  }
1038
1064
  }
1039
1065
 
1040
- void prepare(bool initDocs = true, size_t minWordCnt = 0, size_t minWordDf = 0, size_t removeTopN = 0) override
1066
+ void prepare(bool initDocs = true, size_t minWordCnt = 0, size_t minWordDf = 0, size_t removeTopN = 0, bool updateStopwords = true) override
1041
1067
  {
1042
- if (initDocs) this->removeStopwords(minWordCnt, minWordDf, removeTopN);
1068
+ if (initDocs && updateStopwords) this->removeStopwords(minWordCnt, minWordDf, removeTopN);
1043
1069
  static_cast<DerivedClass*>(this)->updateWeakArray();
1044
1070
  static_cast<DerivedClass*>(this)->initGlobalState(initDocs);
1045
1071
  static_cast<DerivedClass*>(this)->prepareWordPriors();
1046
1072
 
1047
1073
  const size_t V = this->realV;
1074
+ if (V == 0)
1075
+ {
1076
+ std::cerr << "[warn] No valid vocabs in the model!" << std::endl;
1077
+ }
1048
1078
 
1049
1079
  if (initDocs)
1050
1080
  {
1051
1081
  std::vector<uint32_t> df, cf, tf;
1052
- uint32_t totCf;
1082
+ size_t totCf;
1053
1083
 
1054
1084
  // calculate weighting
1055
1085
  if (_tw != TermWeight::one)
@@ -1064,14 +1094,14 @@ namespace tomoto
1064
1094
  ++df[w];
1065
1095
  }
1066
1096
  }
1067
- totCf = accumulate(this->vocabCf.begin(), this->vocabCf.end(), 0);
1097
+ totCf = std::accumulate(this->vocabCf.begin(), this->vocabCf.end(), 0);
1068
1098
  }
1069
1099
  if (_tw == TermWeight::idf)
1070
1100
  {
1071
1101
  vocabWeights.resize(V);
1072
1102
  for (size_t i = 0; i < V; ++i)
1073
1103
  {
1074
- vocabWeights[i] = log(this->docs.size() / (Float)df[i]);
1104
+ vocabWeights[i] = (Float)log(this->docs.size() / (double)df[i]);
1075
1105
  }
1076
1106
  }
1077
1107
  else if (_tw == TermWeight::pmi)
@@ -1079,7 +1109,7 @@ namespace tomoto
1079
1109
  vocabWeights.resize(V);
1080
1110
  for (size_t i = 0; i < V; ++i)
1081
1111
  {
1082
- vocabWeights[i] = this->vocabCf[i] / (float)totCf;
1112
+ vocabWeights[i] = (Float)(this->vocabCf[i] / (double)totCf);
1083
1113
  }
1084
1114
  }
1085
1115
 
@@ -1096,7 +1126,7 @@ namespace tomoto
1096
1126
  for (auto& doc : this->docs) doc.updateSumWordWeight(this->realV);
1097
1127
  }
1098
1128
  static_cast<DerivedClass*>(this)->prepareShared();
1099
- BaseClass::prepare(initDocs, minWordCnt, minWordDf, removeTopN);
1129
+ BaseClass::prepare(initDocs, minWordCnt, minWordDf, removeTopN, updateStopwords);
1100
1130
  }
1101
1131
 
1102
1132
  std::vector<uint64_t> getCountByTopic() const override
@@ -1104,8 +1134,9 @@ namespace tomoto
1104
1134
  return static_cast<const DerivedClass*>(this)->_getTopicsCount();
1105
1135
  }
1106
1136
 
1107
- std::vector<Float> getTopicsByDoc(const _DocType& doc, bool normalize) const
1137
+ std::vector<Float> _getTopicsByDoc(const _DocType& doc, bool normalize) const
1108
1138
  {
1139
+ if (!doc.numByTopic.size()) return {};
1109
1140
  std::vector<Float> ret(K);
1110
1141
  Eigen::Map<Eigen::Array<Float, -1, 1>> m{ ret.data(), K };
1111
1142
  if (normalize)
@@ -26,7 +26,10 @@ namespace tomoto
26
26
  friend typename BaseClass::BaseClass;
27
27
  using WeightType = typename BaseClass::WeightType;
28
28
 
29
- static constexpr char TMID[] = "LLDA";
29
+ static constexpr auto tmid()
30
+ {
31
+ return serializer::to_key("LLDA");
32
+ }
30
33
 
31
34
  Dictionary topicLabelDict;
32
35
 
@@ -171,8 +174,9 @@ namespace tomoto
171
174
  return std::make_unique<_DocType>(as_mutable(this)->template _updateDoc<true>(doc, rawDoc.template getMiscDefault<std::vector<std::string>>("labels")));
172
175
  }
173
176
 
174
- std::vector<Float> getTopicsByDoc(const _DocType& doc, bool normalize) const
177
+ std::vector<Float> _getTopicsByDoc(const _DocType& doc, bool normalize) const
175
178
  {
179
+ if (!doc.numByTopic.size()) return {};
176
180
  std::vector<Float> ret(this->K);
177
181
  auto maskedAlphas = this->alphas.array() * doc.labelMask.template cast<Float>().array();
178
182
  Eigen::Map<Eigen::Array<Float, -1, 1>> m{ ret.data(), this->K };
@@ -63,7 +63,7 @@ namespace tomoto
63
63
  }
64
64
 
65
65
  template<int _inc>
66
- inline void addWordTo(_ModelState& ld, _DocType& doc, uint32_t pid, Vid vid, Tid tid, uint16_t s, uint8_t w, uint8_t r) const
66
+ inline void addWordTo(_ModelState& ld, _DocType& doc, size_t pid, Vid vid, Tid tid, uint16_t s, uint8_t w, uint8_t r) const
67
67
  {
68
68
  const auto K = this->K;
69
69
 
@@ -527,8 +527,9 @@ namespace tomoto
527
527
  this->etaByWord.emplace(word, priors);
528
528
  }
529
529
 
530
- std::vector<Float> getTopicsByDoc(const _DocType& doc, bool normalize) const
530
+ std::vector<Float> _getTopicsByDoc(const _DocType& doc, bool normalize) const
531
531
  {
532
+ if (!doc.numByTopic.size()) return {};
532
533
  std::vector<Float> ret(this->K + KL);
533
534
  Eigen::Map<Eigen::Array<Float, -1, 1>> m{ ret.data(), this->K + KL };
534
535
  if (normalize)
@@ -90,7 +90,7 @@ namespace tomoto
90
90
  }
91
91
 
92
92
  template<int _inc>
93
- inline void addWordTo(_ModelState& ld, _DocType& doc, uint32_t pid, Vid vid, Tid z1, Tid z2) const
93
+ inline void addWordTo(_ModelState& ld, _DocType& doc, size_t pid, Vid vid, Tid z1, Tid z2) const
94
94
  {
95
95
  assert(vid < this->realV);
96
96
  constexpr bool _dec = _inc < 0 && _tw != TermWeight::one;
@@ -26,7 +26,10 @@ namespace tomoto
26
26
  friend typename BaseClass::BaseClass;
27
27
  using WeightType = typename BaseClass::WeightType;
28
28
 
29
- static constexpr char TMID[] = "PLDA";
29
+ static constexpr auto tmid()
30
+ {
31
+ return serializer::to_key("PLDA");
32
+ }
30
33
 
31
34
  Dictionary topicLabelDict;
32
35
 
@@ -178,8 +181,9 @@ namespace tomoto
178
181
  return std::make_unique<_DocType>(as_mutable(this)->template _updateDoc<true>(doc, rawDoc.template getMiscDefault<std::vector<std::string>>("labels")));
179
182
  }
180
183
 
181
- std::vector<Float> getTopicsByDoc(const _DocType& doc, bool normalize) const
184
+ std::vector<Float> _getTopicsByDoc(const _DocType& doc, bool normalize) const
182
185
  {
186
+ if (!doc.numByTopic.size()) return {};
183
187
  std::vector<Float> ret(this->K);
184
188
  auto maskedAlphas = this->alphas.array() * doc.labelMask.template cast<Float>().array();
185
189
  Eigen::Map<Eigen::Array<Float, -1, 1>> m{ ret.data(), this->K };
@@ -18,7 +18,7 @@ namespace tomoto
18
18
 
19
19
  struct PTArgs : public LDAArgs
20
20
  {
21
- size_t p = 100;
21
+ size_t p = 0;
22
22
  Float lambda = 0.01;
23
23
  };
24
24
 
@@ -30,5 +30,7 @@ namespace tomoto
30
30
  bool scalarRng = false);
31
31
 
32
32
  virtual size_t getP() const = 0;
33
+ virtual std::vector<Float> getTopicsFromPseudoDoc(const DocumentBase* doc, bool normalize = true) const = 0;
34
+ virtual std::vector<std::pair<Tid, Float>> getTopicsFromPseudoDocSorted(const DocumentBase* doc, size_t topN) const = 0;
33
35
  };
34
36
  }
@@ -38,7 +38,10 @@ namespace tomoto
38
38
  friend typename BaseClass::BaseClass;
39
39
  using WeightType = typename BaseClass::WeightType;
40
40
 
41
- static constexpr char TMID[] = "PTM";
41
+ static constexpr auto tmid()
42
+ {
43
+ return serializer::to_key("PTM");
44
+ }
42
45
 
43
46
  uint64_t numPDocs;
44
47
  Float lambda;
@@ -72,7 +75,7 @@ namespace tomoto
72
75
  }
73
76
  --ld.numDocsByPDoc[doc.pseudoDoc];
74
77
 
75
- if (pool)
78
+ if (pool && pool->getNumWorkers() > 1)
76
79
  {
77
80
  std::vector<std::future<void>> futures;
78
81
  for (size_t w = 0; w < pool->getNumWorkers(); ++w)
@@ -250,6 +253,16 @@ namespace tomoto
250
253
  }
251
254
  }
252
255
 
256
+ void updateForCopy()
257
+ {
258
+ BaseClass::updateForCopy();
259
+ size_t offset = 0;
260
+ for (auto& doc : this->docs)
261
+ {
262
+ doc.template update<>(this->globalState.numByTopicPDoc.col(doc.pseudoDoc).data(), *static_cast<DerivedClass*>(this));
263
+ }
264
+ }
265
+
253
266
  public:
254
267
  DEFINE_SERIALIZER_AFTER_BASE_WITH_VERSION(BaseClass, 0, numPDocs, lambda);
255
268
  DEFINE_TAGGED_SERIALIZER_AFTER_BASE_WITH_VERSION(BaseClass, 1, 0x00010001, numPDocs, lambda);
@@ -261,8 +274,9 @@ namespace tomoto
261
274
  {
262
275
  }
263
276
 
264
- std::vector<Float> getTopicsByDoc(const _DocType& doc, bool normalize) const
277
+ std::vector<Float> _getTopicsByDoc(const _DocType& doc, bool normalize) const
265
278
  {
279
+ if (doc.Zs.empty()) return {};
266
280
  std::vector<Float> ret(this->K);
267
281
  Eigen::Map<Eigen::Array<Float, -1, 1>> m{ ret.data(), this->K };
268
282
  m = this->alphas.array();
@@ -277,6 +291,25 @@ namespace tomoto
277
291
  return ret;
278
292
  }
279
293
 
294
+ std::vector<Float> getTopicsFromPseudoDoc(const DocumentBase* _doc, bool normalize) const override
295
+ {
296
+ auto& doc = *static_cast<const _DocType*>(_doc);
297
+ if (!doc.numByTopic.size()) return {};
298
+ std::vector<Float> ret(this->K);
299
+ Eigen::Map<Eigen::Array<Float, -1, 1>> m{ ret.data(), this->K };
300
+ m = doc.numByTopic.array().template cast<Float>() + this->alphas.array();
301
+ if (normalize)
302
+ {
303
+ m /= m.sum();
304
+ }
305
+ return ret;
306
+ }
307
+
308
+ std::vector<std::pair<Tid, Float>> getTopicsFromPseudoDocSorted(const DocumentBase* doc, size_t topN) const override
309
+ {
310
+ return extractTopN<Tid>(getTopicsFromPseudoDoc(doc, true), topN);
311
+ }
312
+
280
313
  void updateDocs()
281
314
  {
282
315
  for (auto& doc : this->docs)
@@ -216,7 +216,10 @@ namespace tomoto
216
216
  friend typename BaseClass::BaseClass;
217
217
  using WeightType = typename BaseClass::WeightType;
218
218
 
219
- static constexpr char TMID[] = "SLDA";
219
+ static constexpr auto tmid()
220
+ {
221
+ return serializer::to_key("SLDA");
222
+ }
220
223
 
221
224
  uint64_t F; // number of response variables
222
225
  std::vector<ISLDAModel::GLM> varTypes;
@@ -376,11 +379,11 @@ namespace tomoto
376
379
  {
377
380
  nuSq = Vector::Ones(F);
378
381
  }
379
- else if (args.mu.size() == 1)
382
+ else if (args.nuSq.size() == 1)
380
383
  {
381
384
  nuSq = Vector::Constant(F, args.nuSq[0]);
382
385
  }
383
- else if (args.mu.size() == F)
386
+ else if (args.nuSq.size() == F)
384
387
  {
385
388
  nuSq = Eigen::Map<const Vector>(args.nuSq.data(), args.nuSq.size());
386
389
  }
@@ -13,7 +13,7 @@
13
13
 
14
14
  namespace tomoto
15
15
  {
16
- using RandGen = Eigen::Rand::P8_mt19937_64<uint32_t>;
16
+ using RandGen = Eigen::Rand::P8_mt19937_64_32;
17
17
  using ScalarRandGen = Eigen::Rand::UniversalRandomEngine<uint32_t, std::mt19937_64>;
18
18
 
19
19
  using Vector = Eigen::Matrix<Float, -1, 1>;
@@ -249,11 +249,12 @@ namespace tomoto
249
249
  virtual size_t getNumDocs() const = 0;
250
250
  virtual const Dictionary& getVocabDict() const = 0;
251
251
  virtual const std::vector<uint64_t>& getVocabCf() const = 0;
252
+ virtual std::vector<double> getVocabWeightedCf() const = 0;
252
253
  virtual const std::vector<uint64_t>& getVocabDf() const = 0;
253
254
 
254
255
  virtual int train(size_t iteration, size_t numWorkers, ParallelScheme ps = ParallelScheme::default_, bool freeze_topics = false) = 0;
255
256
  virtual size_t getGlobalStep() const = 0;
256
- virtual void prepare(bool initDocs = true, size_t minWordCnt = 0, size_t minWordDf = 0, size_t removeTopN = 0) = 0;
257
+ virtual void prepare(bool initDocs = true, size_t minWordCnt = 0, size_t minWordDf = 0, size_t removeTopN = 0, bool updateStopwords = true) = 0;
257
258
 
258
259
  virtual size_t getK() const = 0;
259
260
  virtual std::vector<Float> getWidsByTopic(size_t tid, bool normalize = true) const = 0;
@@ -319,6 +320,7 @@ namespace tomoto
319
320
  Dictionary dict;
320
321
  uint64_t realV = 0; // vocab size after removing stopwords
321
322
  uint64_t realN = 0; // total word size after removing stopwords
323
+ double weightedN = 0;
322
324
  size_t maxThreads[(size_t)ParallelScheme::size] = { 0, };
323
325
  size_t minWordCf = 0, minWordDf = 0, removeTopN = 0;
324
326
 
@@ -327,15 +329,17 @@ namespace tomoto
327
329
  void _saveModel(std::ostream& writer, bool fullModel, const std::vector<uint8_t>* extra_data) const
328
330
  {
329
331
  serializer::writeMany(writer,
330
- serializer::to_keyz(static_cast<const _Derived*>(this)->TMID),
331
- serializer::to_keyz(static_cast<const _Derived*>(this)->TWID));
332
+ serializer::to_keyz(static_cast<const _Derived*>(this)->tmid()),
333
+ serializer::to_keyz(static_cast<const _Derived*>(this)->twid())
334
+ );
332
335
  serializer::writeTaggedMany(writer, 0x00010001,
333
336
  serializer::to_keyz("dict"), dict,
334
337
  serializer::to_keyz("vocabCf"), vocabCf,
335
338
  serializer::to_keyz("vocabDf"), vocabDf,
336
339
  serializer::to_keyz("realV"), realV,
337
340
  serializer::to_keyz("globalStep"), globalStep,
338
- serializer::to_keyz("extra"), extra_data ? *extra_data : std::vector<uint8_t>(0));
341
+ serializer::to_keyz("extra"), extra_data ? *extra_data : std::vector<uint8_t>(0)
342
+ );
339
343
  serializer::writeMany(writer, *static_cast<const _Derived*>(this));
340
344
  globalState.serializerWrite(writer);
341
345
  if (fullModel)
@@ -355,8 +359,9 @@ namespace tomoto
355
359
  {
356
360
  std::vector<uint8_t> extra;
357
361
  serializer::readMany(reader,
358
- serializer::to_keyz(static_cast<_Derived*>(this)->TMID),
359
- serializer::to_keyz(static_cast<_Derived*>(this)->TWID));
362
+ serializer::to_keyz(static_cast<_Derived*>(this)->tmid()),
363
+ serializer::to_keyz(static_cast<_Derived*>(this)->twid())
364
+ );
360
365
  serializer::readTaggedMany(reader, 0x00010001,
361
366
  serializer::to_keyz("dict"), dict,
362
367
  serializer::to_keyz("vocabCf"), vocabCf,
@@ -370,14 +375,17 @@ namespace tomoto
370
375
  {
371
376
  reader.seekg(start_pos);
372
377
  serializer::readMany(reader,
373
- serializer::to_key(static_cast<_Derived*>(this)->TMID),
374
- serializer::to_key(static_cast<_Derived*>(this)->TWID),
375
- dict, vocabCf, realV);
378
+ serializer::to_key(static_cast<_Derived*>(this)->tmid()),
379
+ serializer::to_key(static_cast<_Derived*>(this)->twid()),
380
+ dict, vocabCf, realV
381
+ );
376
382
  }
377
383
  serializer::readMany(reader, *static_cast<_Derived*>(this));
378
384
  globalState.serializerRead(reader);
379
385
  serializer::readMany(reader, docs);
380
- realN = countRealN();
386
+ auto p = countRealN();
387
+ realN = p.first;
388
+ weightedN = p.second;
381
389
  }
382
390
 
383
391
  template<typename _DocTy>
@@ -429,7 +437,7 @@ namespace tomoto
429
437
  }
430
438
  else
431
439
  {
432
- throw exc::InvalidArgument{ "Either `words` or `rawWords` must be filled." };
440
+ throw exc::EmptyWordArgument{ "Either `words` or `rawWords` must be filled." };
433
441
  }
434
442
  return doc;
435
443
  }
@@ -490,17 +498,23 @@ namespace tomoto
490
498
  }
491
499
  }
492
500
 
493
- size_t countRealN() const
501
+ std::pair<size_t, double> countRealN() const
494
502
  {
495
503
  size_t n = 0;
504
+ double weighted = 0;
496
505
  for (auto& doc : docs)
497
506
  {
498
- for (auto& w : doc.words)
507
+ for (size_t i = 0; i < doc.words.size(); ++i)
499
508
  {
500
- if (w < realV) ++n;
509
+ auto w = doc.words[i];
510
+ if (w < realV)
511
+ {
512
+ ++n;
513
+ weighted += doc.wordWeights.empty() ? 1 : doc.wordWeights[i];
514
+ }
501
515
  }
502
516
  }
503
- return n;
517
+ return std::make_pair(n, weighted);
504
518
  }
505
519
 
506
520
  void removeStopwords(size_t minWordCnt, size_t minWordDf, size_t removeTopN)
@@ -544,14 +558,9 @@ namespace tomoto
544
558
  }
545
559
 
546
560
  dict.reorder(order);
547
- realN = 0;
548
561
  for (auto& doc : docs)
549
562
  {
550
- for (auto& w : doc.words)
551
- {
552
- w = order[w];
553
- if (w < realV) ++realN;
554
- }
563
+ for (auto& w : doc.words) w = order[w];
555
564
  }
556
565
  }
557
566
 
@@ -596,8 +605,12 @@ namespace tomoto
596
605
  return empty;
597
606
  }
598
607
 
599
- void prepare(bool initDocs = true, size_t minWordCnt = 0, size_t minWordDf = 0, size_t removeTopN = 0) override
608
+ void prepare(bool initDocs = true, size_t minWordCnt = 0, size_t minWordDf = 0, size_t removeTopN = 0, bool updateStopwords = true) override
600
609
  {
610
+ auto p = countRealN();
611
+ realN = p.first;
612
+ weightedN = p.second;
613
+
601
614
  maxThreads[(size_t)ParallelScheme::default_] = -1;
602
615
  maxThreads[(size_t)ParallelScheme::none] = -1;
603
616
  maxThreads[(size_t)ParallelScheme::copy_merge] = static_cast<_Derived*>(this)->template estimateMaxThreads<ParallelScheme::copy_merge>();
@@ -661,7 +674,8 @@ namespace tomoto
661
674
  auto state = ps == ParallelScheme::none ? &globalState : localData.data();
662
675
  for (size_t i = 0; i < iteration; ++i)
663
676
  {
664
- while (1)
677
+ size_t retry;
678
+ for (retry = 0; retry < 10; ++retry)
665
679
  {
666
680
  try
667
681
  {
@@ -690,6 +704,7 @@ namespace tomoto
690
704
  if(ret < 0) return ret;
691
705
  }
692
706
  }
707
+ if (retry >= 10) return -1;
693
708
  ++globalStep;
694
709
  }
695
710
  return 0;
@@ -697,7 +712,7 @@ namespace tomoto
697
712
 
698
713
  double getLLPerWord() const override
699
714
  {
700
- return words.empty() ? 0 : static_cast<const _Derived*>(this)->getLL() / realN;
715
+ return words.empty() ? 0 : static_cast<const _Derived*>(this)->getLL() / weightedN;
701
716
  }
702
717
 
703
718
  double getPerplexity() const override
@@ -797,7 +812,7 @@ namespace tomoto
797
812
 
798
813
  std::vector<Float> getTopicsByDoc(const DocumentBase* doc, bool normalize) const override
799
814
  {
800
- return static_cast<const _Derived*>(this)->getTopicsByDoc(*static_cast<const DocType*>(doc), normalize);
815
+ return static_cast<const _Derived*>(this)->_getTopicsByDoc(*static_cast<const DocType*>(doc), normalize);
801
816
  }
802
817
 
803
818
  std::vector<std::pair<Tid, Float>> getTopicsByDocSorted(const DocumentBase* doc, size_t topN) const override
@@ -832,6 +847,20 @@ namespace tomoto
832
847
  return vocabCf;
833
848
  }
834
849
 
850
+ std::vector<double> getVocabWeightedCf() const override
851
+ {
852
+ std::vector<double> ret(realV);
853
+ for (auto& doc : docs)
854
+ {
855
+ for (size_t i = 0; i < doc.words.size(); ++i)
856
+ {
857
+ if (doc.words[i] >= realV) continue;
858
+ ret[doc.words[i]] += doc.wordWeights.empty() ? 1 : doc.wordWeights[i];
859
+ }
860
+ }
861
+ return ret;
862
+ }
863
+
835
864
  const std::vector<uint64_t>& getVocabDf() const override
836
865
  {
837
866
  return vocabDf;
@@ -35,11 +35,12 @@ namespace tomoto
35
35
  bitsize = o.bitsize;
36
36
  if (msize)
37
37
  {
38
- arr = std::make_unique<_Precision[]>(1 << bitsize);
39
- alias = std::make_unique<size_t[]>(1 << bitsize);
38
+ size_t n = (size_t)1 << bitsize;
39
+ arr = std::make_unique<_Precision[]>(n);
40
+ alias = std::make_unique<size_t[]>(n);
40
41
 
41
- std::copy(o.arr.get(), o.arr.get() + (1 << bitsize), arr.get());
42
- std::copy(o.alias.get(), o.alias.get() + (1 << bitsize), alias.get());
42
+ std::copy(o.arr.get(), o.arr.get() + n, arr.get());
43
+ std::copy(o.alias.get(), o.alias.get() + n, alias.get());
43
44
  }
44
45
  return *this;
45
46
  }
@@ -32,7 +32,7 @@ namespace tomoto
32
32
  auto it = dict.find(word);
33
33
  if (it == dict.end())
34
34
  {
35
- dict.emplace(std::make_pair(word, dict.size()));
35
+ dict.emplace(word, (Vid)dict.size());
36
36
  id2word.emplace_back(word);
37
37
  return (Vid)(dict.size() - 1);
38
38
  }
@@ -64,7 +64,7 @@ namespace tomoto
64
64
  serializer::readMany(reader, serializer::to_key("Dict"), id2word);
65
65
  for (size_t i = 0; i < id2word.size(); ++i)
66
66
  {
67
- dict.emplace(id2word[i], i);
67
+ dict.emplace(id2word[i], (Vid)i);
68
68
  }
69
69
  }
70
70
 
@@ -56,7 +56,7 @@ namespace Eigen
56
56
  }
57
57
  }
58
58
  #endif
59
- #if defined(EIGEN_VECTORIZE_SSE2)
59
+ #ifdef EIGEN_VECTORIZE_SSE2
60
60
  #include <xmmintrin.h>
61
61
  #include "sse_gamma.h"
62
62
 
@@ -91,6 +91,41 @@ namespace Eigen
91
91
  }
92
92
  }
93
93
  #endif
94
+ #ifdef EIGEN_VECTORIZE_NEON
95
+ #include <arm_neon.h>
96
+
97
+
98
+ namespace Eigen
99
+ {
100
+ namespace internal
101
+ {
102
+ template<> struct to_int_packet<Packet4f>
103
+ {
104
+ typedef Packet4i type;
105
+ };
106
+
107
+ template<> struct to_float_packet<Packet4i>
108
+ {
109
+ typedef Packet4f type;
110
+ };
111
+
112
+ EIGEN_STRONG_INLINE Packet4f p_to_f32(const Packet4i& a)
113
+ {
114
+ return vcvtq_f32_s32(a);
115
+ }
116
+
117
+ EIGEN_STRONG_INLINE Packet4f p_bool2float(const Packet4f& a)
118
+ {
119
+ return vcvtq_f32_s32(vandq_s32(a, vdupq_n_s32(1)));
120
+ }
121
+
122
+ EIGEN_STRONG_INLINE Packet4f p_bool2float(const Packet4i& a)
123
+ {
124
+ return p_bool2float((Packet4f)vreinterpretq_f32_s32((int32x4_t)a));
125
+ }
126
+ }
127
+ }
128
+ #endif
94
129
 
95
130
  namespace Eigen
96
131
  {