tomoto 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE.txt +22 -0
- data/README.md +123 -0
- data/ext/tomoto/ext.cpp +245 -0
- data/ext/tomoto/extconf.rb +28 -0
- data/lib/tomoto.rb +12 -0
- data/lib/tomoto/ct.rb +11 -0
- data/lib/tomoto/hdp.rb +11 -0
- data/lib/tomoto/lda.rb +67 -0
- data/lib/tomoto/version.rb +3 -0
- data/vendor/EigenRand/EigenRand/Core.h +1139 -0
- data/vendor/EigenRand/EigenRand/Dists/Basic.h +111 -0
- data/vendor/EigenRand/EigenRand/Dists/Discrete.h +877 -0
- data/vendor/EigenRand/EigenRand/Dists/GammaPoisson.h +108 -0
- data/vendor/EigenRand/EigenRand/Dists/NormalExp.h +626 -0
- data/vendor/EigenRand/EigenRand/EigenRand +19 -0
- data/vendor/EigenRand/EigenRand/Macro.h +24 -0
- data/vendor/EigenRand/EigenRand/MorePacketMath.h +978 -0
- data/vendor/EigenRand/EigenRand/PacketFilter.h +286 -0
- data/vendor/EigenRand/EigenRand/PacketRandomEngine.h +624 -0
- data/vendor/EigenRand/EigenRand/RandUtils.h +413 -0
- data/vendor/EigenRand/EigenRand/doc.h +220 -0
- data/vendor/EigenRand/LICENSE +21 -0
- data/vendor/EigenRand/README.md +288 -0
- data/vendor/eigen/COPYING.BSD +26 -0
- data/vendor/eigen/COPYING.GPL +674 -0
- data/vendor/eigen/COPYING.LGPL +502 -0
- data/vendor/eigen/COPYING.MINPACK +52 -0
- data/vendor/eigen/COPYING.MPL2 +373 -0
- data/vendor/eigen/COPYING.README +18 -0
- data/vendor/eigen/Eigen/CMakeLists.txt +19 -0
- data/vendor/eigen/Eigen/Cholesky +46 -0
- data/vendor/eigen/Eigen/CholmodSupport +48 -0
- data/vendor/eigen/Eigen/Core +537 -0
- data/vendor/eigen/Eigen/Dense +7 -0
- data/vendor/eigen/Eigen/Eigen +2 -0
- data/vendor/eigen/Eigen/Eigenvalues +61 -0
- data/vendor/eigen/Eigen/Geometry +62 -0
- data/vendor/eigen/Eigen/Householder +30 -0
- data/vendor/eigen/Eigen/IterativeLinearSolvers +48 -0
- data/vendor/eigen/Eigen/Jacobi +33 -0
- data/vendor/eigen/Eigen/LU +50 -0
- data/vendor/eigen/Eigen/MetisSupport +35 -0
- data/vendor/eigen/Eigen/OrderingMethods +73 -0
- data/vendor/eigen/Eigen/PaStiXSupport +48 -0
- data/vendor/eigen/Eigen/PardisoSupport +35 -0
- data/vendor/eigen/Eigen/QR +51 -0
- data/vendor/eigen/Eigen/QtAlignedMalloc +40 -0
- data/vendor/eigen/Eigen/SPQRSupport +34 -0
- data/vendor/eigen/Eigen/SVD +51 -0
- data/vendor/eigen/Eigen/Sparse +36 -0
- data/vendor/eigen/Eigen/SparseCholesky +45 -0
- data/vendor/eigen/Eigen/SparseCore +69 -0
- data/vendor/eigen/Eigen/SparseLU +46 -0
- data/vendor/eigen/Eigen/SparseQR +37 -0
- data/vendor/eigen/Eigen/StdDeque +27 -0
- data/vendor/eigen/Eigen/StdList +26 -0
- data/vendor/eigen/Eigen/StdVector +27 -0
- data/vendor/eigen/Eigen/SuperLUSupport +64 -0
- data/vendor/eigen/Eigen/UmfPackSupport +40 -0
- data/vendor/eigen/Eigen/src/Cholesky/LDLT.h +673 -0
- data/vendor/eigen/Eigen/src/Cholesky/LLT.h +542 -0
- data/vendor/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +99 -0
- data/vendor/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +639 -0
- data/vendor/eigen/Eigen/src/Core/Array.h +329 -0
- data/vendor/eigen/Eigen/src/Core/ArrayBase.h +226 -0
- data/vendor/eigen/Eigen/src/Core/ArrayWrapper.h +209 -0
- data/vendor/eigen/Eigen/src/Core/Assign.h +90 -0
- data/vendor/eigen/Eigen/src/Core/AssignEvaluator.h +935 -0
- data/vendor/eigen/Eigen/src/Core/Assign_MKL.h +178 -0
- data/vendor/eigen/Eigen/src/Core/BandMatrix.h +353 -0
- data/vendor/eigen/Eigen/src/Core/Block.h +452 -0
- data/vendor/eigen/Eigen/src/Core/BooleanRedux.h +164 -0
- data/vendor/eigen/Eigen/src/Core/CommaInitializer.h +160 -0
- data/vendor/eigen/Eigen/src/Core/ConditionEstimator.h +175 -0
- data/vendor/eigen/Eigen/src/Core/CoreEvaluators.h +1688 -0
- data/vendor/eigen/Eigen/src/Core/CoreIterators.h +127 -0
- data/vendor/eigen/Eigen/src/Core/CwiseBinaryOp.h +184 -0
- data/vendor/eigen/Eigen/src/Core/CwiseNullaryOp.h +866 -0
- data/vendor/eigen/Eigen/src/Core/CwiseTernaryOp.h +197 -0
- data/vendor/eigen/Eigen/src/Core/CwiseUnaryOp.h +103 -0
- data/vendor/eigen/Eigen/src/Core/CwiseUnaryView.h +128 -0
- data/vendor/eigen/Eigen/src/Core/DenseBase.h +611 -0
- data/vendor/eigen/Eigen/src/Core/DenseCoeffsBase.h +681 -0
- data/vendor/eigen/Eigen/src/Core/DenseStorage.h +570 -0
- data/vendor/eigen/Eigen/src/Core/Diagonal.h +260 -0
- data/vendor/eigen/Eigen/src/Core/DiagonalMatrix.h +343 -0
- data/vendor/eigen/Eigen/src/Core/DiagonalProduct.h +28 -0
- data/vendor/eigen/Eigen/src/Core/Dot.h +318 -0
- data/vendor/eigen/Eigen/src/Core/EigenBase.h +159 -0
- data/vendor/eigen/Eigen/src/Core/ForceAlignedAccess.h +146 -0
- data/vendor/eigen/Eigen/src/Core/Fuzzy.h +155 -0
- data/vendor/eigen/Eigen/src/Core/GeneralProduct.h +455 -0
- data/vendor/eigen/Eigen/src/Core/GenericPacketMath.h +593 -0
- data/vendor/eigen/Eigen/src/Core/GlobalFunctions.h +187 -0
- data/vendor/eigen/Eigen/src/Core/IO.h +225 -0
- data/vendor/eigen/Eigen/src/Core/Inverse.h +118 -0
- data/vendor/eigen/Eigen/src/Core/Map.h +171 -0
- data/vendor/eigen/Eigen/src/Core/MapBase.h +303 -0
- data/vendor/eigen/Eigen/src/Core/MathFunctions.h +1415 -0
- data/vendor/eigen/Eigen/src/Core/MathFunctionsImpl.h +101 -0
- data/vendor/eigen/Eigen/src/Core/Matrix.h +459 -0
- data/vendor/eigen/Eigen/src/Core/MatrixBase.h +529 -0
- data/vendor/eigen/Eigen/src/Core/NestByValue.h +110 -0
- data/vendor/eigen/Eigen/src/Core/NoAlias.h +108 -0
- data/vendor/eigen/Eigen/src/Core/NumTraits.h +248 -0
- data/vendor/eigen/Eigen/src/Core/PermutationMatrix.h +633 -0
- data/vendor/eigen/Eigen/src/Core/PlainObjectBase.h +1035 -0
- data/vendor/eigen/Eigen/src/Core/Product.h +186 -0
- data/vendor/eigen/Eigen/src/Core/ProductEvaluators.h +1112 -0
- data/vendor/eigen/Eigen/src/Core/Random.h +182 -0
- data/vendor/eigen/Eigen/src/Core/Redux.h +505 -0
- data/vendor/eigen/Eigen/src/Core/Ref.h +283 -0
- data/vendor/eigen/Eigen/src/Core/Replicate.h +142 -0
- data/vendor/eigen/Eigen/src/Core/ReturnByValue.h +117 -0
- data/vendor/eigen/Eigen/src/Core/Reverse.h +211 -0
- data/vendor/eigen/Eigen/src/Core/Select.h +162 -0
- data/vendor/eigen/Eigen/src/Core/SelfAdjointView.h +352 -0
- data/vendor/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +47 -0
- data/vendor/eigen/Eigen/src/Core/Solve.h +188 -0
- data/vendor/eigen/Eigen/src/Core/SolveTriangular.h +235 -0
- data/vendor/eigen/Eigen/src/Core/SolverBase.h +130 -0
- data/vendor/eigen/Eigen/src/Core/StableNorm.h +221 -0
- data/vendor/eigen/Eigen/src/Core/Stride.h +111 -0
- data/vendor/eigen/Eigen/src/Core/Swap.h +67 -0
- data/vendor/eigen/Eigen/src/Core/Transpose.h +403 -0
- data/vendor/eigen/Eigen/src/Core/Transpositions.h +407 -0
- data/vendor/eigen/Eigen/src/Core/TriangularMatrix.h +983 -0
- data/vendor/eigen/Eigen/src/Core/VectorBlock.h +96 -0
- data/vendor/eigen/Eigen/src/Core/VectorwiseOp.h +695 -0
- data/vendor/eigen/Eigen/src/Core/Visitor.h +273 -0
- data/vendor/eigen/Eigen/src/Core/arch/AVX/Complex.h +451 -0
- data/vendor/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +439 -0
- data/vendor/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +637 -0
- data/vendor/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +51 -0
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +391 -0
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1316 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +430 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +322 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +1061 -0
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/Complex.h +103 -0
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/Half.h +674 -0
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h +91 -0
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +333 -0
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +1124 -0
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +212 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +29 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/Settings.h +49 -0
- data/vendor/eigen/Eigen/src/Core/arch/NEON/Complex.h +490 -0
- data/vendor/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +91 -0
- data/vendor/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +760 -0
- data/vendor/eigen/Eigen/src/Core/arch/SSE/Complex.h +471 -0
- data/vendor/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +562 -0
- data/vendor/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +895 -0
- data/vendor/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +77 -0
- data/vendor/eigen/Eigen/src/Core/arch/ZVector/Complex.h +397 -0
- data/vendor/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +137 -0
- data/vendor/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +945 -0
- data/vendor/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +168 -0
- data/vendor/eigen/Eigen/src/Core/functors/BinaryFunctors.h +475 -0
- data/vendor/eigen/Eigen/src/Core/functors/NullaryFunctors.h +188 -0
- data/vendor/eigen/Eigen/src/Core/functors/StlFunctors.h +136 -0
- data/vendor/eigen/Eigen/src/Core/functors/TernaryFunctors.h +25 -0
- data/vendor/eigen/Eigen/src/Core/functors/UnaryFunctors.h +792 -0
- data/vendor/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +2156 -0
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +492 -0
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +311 -0
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +145 -0
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +122 -0
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +619 -0
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +136 -0
- data/vendor/eigen/Eigen/src/Core/products/Parallelizer.h +163 -0
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +521 -0
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +287 -0
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +260 -0
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +118 -0
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointProduct.h +133 -0
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +93 -0
- data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +466 -0
- data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +315 -0
- data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +350 -0
- data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +255 -0
- data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +335 -0
- data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +163 -0
- data/vendor/eigen/Eigen/src/Core/products/TriangularSolverVector.h +145 -0
- data/vendor/eigen/Eigen/src/Core/util/BlasUtil.h +398 -0
- data/vendor/eigen/Eigen/src/Core/util/Constants.h +547 -0
- data/vendor/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +83 -0
- data/vendor/eigen/Eigen/src/Core/util/ForwardDeclarations.h +302 -0
- data/vendor/eigen/Eigen/src/Core/util/MKL_support.h +130 -0
- data/vendor/eigen/Eigen/src/Core/util/Macros.h +1001 -0
- data/vendor/eigen/Eigen/src/Core/util/Memory.h +993 -0
- data/vendor/eigen/Eigen/src/Core/util/Meta.h +534 -0
- data/vendor/eigen/Eigen/src/Core/util/NonMPL2.h +3 -0
- data/vendor/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +27 -0
- data/vendor/eigen/Eigen/src/Core/util/StaticAssert.h +218 -0
- data/vendor/eigen/Eigen/src/Core/util/XprHelper.h +821 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +346 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +459 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +91 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/EigenSolver.h +622 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +418 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +226 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +374 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +158 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/RealQZ.h +654 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/RealSchur.h +546 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +77 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +870 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +87 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +556 -0
- data/vendor/eigen/Eigen/src/Geometry/AlignedBox.h +392 -0
- data/vendor/eigen/Eigen/src/Geometry/AngleAxis.h +247 -0
- data/vendor/eigen/Eigen/src/Geometry/EulerAngles.h +114 -0
- data/vendor/eigen/Eigen/src/Geometry/Homogeneous.h +497 -0
- data/vendor/eigen/Eigen/src/Geometry/Hyperplane.h +282 -0
- data/vendor/eigen/Eigen/src/Geometry/OrthoMethods.h +234 -0
- data/vendor/eigen/Eigen/src/Geometry/ParametrizedLine.h +195 -0
- data/vendor/eigen/Eigen/src/Geometry/Quaternion.h +814 -0
- data/vendor/eigen/Eigen/src/Geometry/Rotation2D.h +199 -0
- data/vendor/eigen/Eigen/src/Geometry/RotationBase.h +206 -0
- data/vendor/eigen/Eigen/src/Geometry/Scaling.h +170 -0
- data/vendor/eigen/Eigen/src/Geometry/Transform.h +1542 -0
- data/vendor/eigen/Eigen/src/Geometry/Translation.h +208 -0
- data/vendor/eigen/Eigen/src/Geometry/Umeyama.h +166 -0
- data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +161 -0
- data/vendor/eigen/Eigen/src/Householder/BlockHouseholder.h +103 -0
- data/vendor/eigen/Eigen/src/Householder/Householder.h +172 -0
- data/vendor/eigen/Eigen/src/Householder/HouseholderSequence.h +470 -0
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +226 -0
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +228 -0
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +246 -0
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +400 -0
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +462 -0
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +394 -0
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +216 -0
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +115 -0
- data/vendor/eigen/Eigen/src/Jacobi/Jacobi.h +462 -0
- data/vendor/eigen/Eigen/src/LU/Determinant.h +101 -0
- data/vendor/eigen/Eigen/src/LU/FullPivLU.h +891 -0
- data/vendor/eigen/Eigen/src/LU/InverseImpl.h +415 -0
- data/vendor/eigen/Eigen/src/LU/PartialPivLU.h +611 -0
- data/vendor/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +83 -0
- data/vendor/eigen/Eigen/src/LU/arch/Inverse_SSE.h +338 -0
- data/vendor/eigen/Eigen/src/MetisSupport/MetisSupport.h +137 -0
- data/vendor/eigen/Eigen/src/OrderingMethods/Amd.h +445 -0
- data/vendor/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +1843 -0
- data/vendor/eigen/Eigen/src/OrderingMethods/Ordering.h +157 -0
- data/vendor/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +678 -0
- data/vendor/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +543 -0
- data/vendor/eigen/Eigen/src/QR/ColPivHouseholderQR.h +653 -0
- data/vendor/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +97 -0
- data/vendor/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +562 -0
- data/vendor/eigen/Eigen/src/QR/FullPivHouseholderQR.h +676 -0
- data/vendor/eigen/Eigen/src/QR/HouseholderQR.h +409 -0
- data/vendor/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +68 -0
- data/vendor/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +313 -0
- data/vendor/eigen/Eigen/src/SVD/BDCSVD.h +1246 -0
- data/vendor/eigen/Eigen/src/SVD/JacobiSVD.h +804 -0
- data/vendor/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +91 -0
- data/vendor/eigen/Eigen/src/SVD/SVDBase.h +315 -0
- data/vendor/eigen/Eigen/src/SVD/UpperBidiagonalization.h +414 -0
- data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +689 -0
- data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +199 -0
- data/vendor/eigen/Eigen/src/SparseCore/AmbiVector.h +377 -0
- data/vendor/eigen/Eigen/src/SparseCore/CompressedStorage.h +258 -0
- data/vendor/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +352 -0
- data/vendor/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +67 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseAssign.h +216 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseBlock.h +603 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseColEtree.h +206 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +341 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +726 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +148 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +320 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +138 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseDot.h +98 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseFuzzy.h +29 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseMap.h +305 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseMatrix.h +1403 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +405 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparsePermutation.h +178 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseProduct.h +169 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseRedux.h +49 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseRef.h +397 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +656 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseSolverBase.h +124 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +198 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseTranspose.h +92 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseTriangularView.h +189 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseUtil.h +178 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseVector.h +478 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseView.h +253 -0
- data/vendor/eigen/Eigen/src/SparseCore/TriangularSolver.h +315 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU.h +773 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLUImpl.h +66 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +226 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +110 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +301 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +80 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +181 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +179 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +107 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +280 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +126 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +130 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +223 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +258 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +137 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +136 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +83 -0
- data/vendor/eigen/Eigen/src/SparseQR/SparseQR.h +745 -0
- data/vendor/eigen/Eigen/src/StlSupport/StdDeque.h +126 -0
- data/vendor/eigen/Eigen/src/StlSupport/StdList.h +106 -0
- data/vendor/eigen/Eigen/src/StlSupport/StdVector.h +131 -0
- data/vendor/eigen/Eigen/src/StlSupport/details.h +84 -0
- data/vendor/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +1027 -0
- data/vendor/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +506 -0
- data/vendor/eigen/Eigen/src/misc/Image.h +82 -0
- data/vendor/eigen/Eigen/src/misc/Kernel.h +79 -0
- data/vendor/eigen/Eigen/src/misc/RealSvd2x2.h +55 -0
- data/vendor/eigen/Eigen/src/misc/blas.h +440 -0
- data/vendor/eigen/Eigen/src/misc/lapack.h +152 -0
- data/vendor/eigen/Eigen/src/misc/lapacke.h +16291 -0
- data/vendor/eigen/Eigen/src/misc/lapacke_mangling.h +17 -0
- data/vendor/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +332 -0
- data/vendor/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +552 -0
- data/vendor/eigen/Eigen/src/plugins/BlockMethods.h +1058 -0
- data/vendor/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +115 -0
- data/vendor/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +163 -0
- data/vendor/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +152 -0
- data/vendor/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +85 -0
- data/vendor/eigen/README.md +3 -0
- data/vendor/eigen/bench/README.txt +55 -0
- data/vendor/eigen/bench/btl/COPYING +340 -0
- data/vendor/eigen/bench/btl/README +154 -0
- data/vendor/eigen/bench/tensors/README +21 -0
- data/vendor/eigen/blas/README.txt +6 -0
- data/vendor/eigen/demos/mandelbrot/README +10 -0
- data/vendor/eigen/demos/mix_eigen_and_c/README +9 -0
- data/vendor/eigen/demos/opengl/README +13 -0
- data/vendor/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md +1760 -0
- data/vendor/eigen/unsupported/README.txt +50 -0
- data/vendor/tomotopy/LICENSE +21 -0
- data/vendor/tomotopy/README.kr.rst +375 -0
- data/vendor/tomotopy/README.rst +382 -0
- data/vendor/tomotopy/src/Labeling/FoRelevance.cpp +362 -0
- data/vendor/tomotopy/src/Labeling/FoRelevance.h +88 -0
- data/vendor/tomotopy/src/Labeling/Labeler.h +50 -0
- data/vendor/tomotopy/src/TopicModel/CT.h +37 -0
- data/vendor/tomotopy/src/TopicModel/CTModel.cpp +13 -0
- data/vendor/tomotopy/src/TopicModel/CTModel.hpp +293 -0
- data/vendor/tomotopy/src/TopicModel/DMR.h +51 -0
- data/vendor/tomotopy/src/TopicModel/DMRModel.cpp +13 -0
- data/vendor/tomotopy/src/TopicModel/DMRModel.hpp +374 -0
- data/vendor/tomotopy/src/TopicModel/DT.h +65 -0
- data/vendor/tomotopy/src/TopicModel/DTM.h +22 -0
- data/vendor/tomotopy/src/TopicModel/DTModel.cpp +15 -0
- data/vendor/tomotopy/src/TopicModel/DTModel.hpp +572 -0
- data/vendor/tomotopy/src/TopicModel/GDMR.h +37 -0
- data/vendor/tomotopy/src/TopicModel/GDMRModel.cpp +14 -0
- data/vendor/tomotopy/src/TopicModel/GDMRModel.hpp +485 -0
- data/vendor/tomotopy/src/TopicModel/HDP.h +74 -0
- data/vendor/tomotopy/src/TopicModel/HDPModel.cpp +13 -0
- data/vendor/tomotopy/src/TopicModel/HDPModel.hpp +592 -0
- data/vendor/tomotopy/src/TopicModel/HLDA.h +40 -0
- data/vendor/tomotopy/src/TopicModel/HLDAModel.cpp +13 -0
- data/vendor/tomotopy/src/TopicModel/HLDAModel.hpp +681 -0
- data/vendor/tomotopy/src/TopicModel/HPA.h +27 -0
- data/vendor/tomotopy/src/TopicModel/HPAModel.cpp +21 -0
- data/vendor/tomotopy/src/TopicModel/HPAModel.hpp +588 -0
- data/vendor/tomotopy/src/TopicModel/LDA.h +144 -0
- data/vendor/tomotopy/src/TopicModel/LDACVB0Model.hpp +442 -0
- data/vendor/tomotopy/src/TopicModel/LDAModel.cpp +13 -0
- data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +1058 -0
- data/vendor/tomotopy/src/TopicModel/LLDA.h +45 -0
- data/vendor/tomotopy/src/TopicModel/LLDAModel.cpp +13 -0
- data/vendor/tomotopy/src/TopicModel/LLDAModel.hpp +203 -0
- data/vendor/tomotopy/src/TopicModel/MGLDA.h +63 -0
- data/vendor/tomotopy/src/TopicModel/MGLDAModel.cpp +17 -0
- data/vendor/tomotopy/src/TopicModel/MGLDAModel.hpp +558 -0
- data/vendor/tomotopy/src/TopicModel/PA.h +43 -0
- data/vendor/tomotopy/src/TopicModel/PAModel.cpp +13 -0
- data/vendor/tomotopy/src/TopicModel/PAModel.hpp +467 -0
- data/vendor/tomotopy/src/TopicModel/PLDA.h +17 -0
- data/vendor/tomotopy/src/TopicModel/PLDAModel.cpp +13 -0
- data/vendor/tomotopy/src/TopicModel/PLDAModel.hpp +214 -0
- data/vendor/tomotopy/src/TopicModel/SLDA.h +54 -0
- data/vendor/tomotopy/src/TopicModel/SLDAModel.cpp +17 -0
- data/vendor/tomotopy/src/TopicModel/SLDAModel.hpp +456 -0
- data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +692 -0
- data/vendor/tomotopy/src/Utils/AliasMethod.hpp +169 -0
- data/vendor/tomotopy/src/Utils/Dictionary.h +80 -0
- data/vendor/tomotopy/src/Utils/EigenAddonOps.hpp +181 -0
- data/vendor/tomotopy/src/Utils/LBFGS.h +202 -0
- data/vendor/tomotopy/src/Utils/LBFGS/LineSearchBacktracking.h +120 -0
- data/vendor/tomotopy/src/Utils/LBFGS/LineSearchBracketing.h +122 -0
- data/vendor/tomotopy/src/Utils/LBFGS/Param.h +213 -0
- data/vendor/tomotopy/src/Utils/LUT.hpp +82 -0
- data/vendor/tomotopy/src/Utils/MultiNormalDistribution.hpp +69 -0
- data/vendor/tomotopy/src/Utils/PolyaGamma.hpp +200 -0
- data/vendor/tomotopy/src/Utils/PolyaGammaHybrid.hpp +672 -0
- data/vendor/tomotopy/src/Utils/ThreadPool.hpp +150 -0
- data/vendor/tomotopy/src/Utils/Trie.hpp +220 -0
- data/vendor/tomotopy/src/Utils/TruncMultiNormal.hpp +94 -0
- data/vendor/tomotopy/src/Utils/Utils.hpp +337 -0
- data/vendor/tomotopy/src/Utils/avx_gamma.h +46 -0
- data/vendor/tomotopy/src/Utils/avx_mathfun.h +736 -0
- data/vendor/tomotopy/src/Utils/exception.h +28 -0
- data/vendor/tomotopy/src/Utils/math.h +281 -0
- data/vendor/tomotopy/src/Utils/rtnorm.hpp +2690 -0
- data/vendor/tomotopy/src/Utils/sample.hpp +192 -0
- data/vendor/tomotopy/src/Utils/serializer.hpp +695 -0
- data/vendor/tomotopy/src/Utils/slp.hpp +131 -0
- data/vendor/tomotopy/src/Utils/sse_gamma.h +48 -0
- data/vendor/tomotopy/src/Utils/sse_mathfun.h +710 -0
- data/vendor/tomotopy/src/Utils/text.hpp +49 -0
- data/vendor/tomotopy/src/Utils/tvector.hpp +543 -0
- metadata +531 -0
@@ -0,0 +1,144 @@
|
|
1
|
+
#pragma once
|
2
|
+
#include "TopicModel.hpp"
|
3
|
+
|
4
|
+
namespace tomoto
|
5
|
+
{
|
6
|
+
enum class TermWeight { one, idf, pmi, size };
|
7
|
+
|
8
|
+
template<typename _Scalar>
|
9
|
+
struct ShareableVector : Eigen::Map<Eigen::Matrix<_Scalar, -1, 1>>
|
10
|
+
{
|
11
|
+
Eigen::Matrix<_Scalar, -1, 1> ownData;
|
12
|
+
ShareableVector(_Scalar* ptr = nullptr, Eigen::Index len = 0)
|
13
|
+
: Eigen::Map<Eigen::Matrix<_Scalar, -1, 1>>(nullptr, 0)
|
14
|
+
{
|
15
|
+
init(ptr, len);
|
16
|
+
}
|
17
|
+
|
18
|
+
void init(_Scalar* ptr, Eigen::Index len)
|
19
|
+
{
|
20
|
+
if (!ptr && len)
|
21
|
+
{
|
22
|
+
ownData = Eigen::Matrix<_Scalar, -1, 1>::Zero(len);
|
23
|
+
ptr = ownData.data();
|
24
|
+
}
|
25
|
+
// is this the best way??
|
26
|
+
this->m_data = ptr;
|
27
|
+
((Eigen::internal::variable_if_dynamic<Eigen::Index, -1>*)&this->m_rows)->setValue(len);
|
28
|
+
}
|
29
|
+
|
30
|
+
void conservativeResize(size_t newSize)
|
31
|
+
{
|
32
|
+
ownData.conservativeResize(newSize);
|
33
|
+
init(ownData.data(), ownData.size());
|
34
|
+
}
|
35
|
+
|
36
|
+
void becomeOwner()
|
37
|
+
{
|
38
|
+
if (ownData.data() != this->m_data)
|
39
|
+
{
|
40
|
+
ownData = *this;
|
41
|
+
init(ownData.data(), ownData.size());
|
42
|
+
}
|
43
|
+
}
|
44
|
+
};
|
45
|
+
|
46
|
+
template<typename _Base, TermWeight _tw>
|
47
|
+
struct SumWordWeight
|
48
|
+
{
|
49
|
+
Float sumWordWeight = 0;
|
50
|
+
Float getSumWordWeight() const
|
51
|
+
{
|
52
|
+
return sumWordWeight;
|
53
|
+
}
|
54
|
+
|
55
|
+
void updateSumWordWeight(size_t realV)
|
56
|
+
{
|
57
|
+
sumWordWeight = std::accumulate(static_cast<_Base*>(this)->wordWeights.begin(), static_cast<_Base*>(this)->wordWeights.end(), 0.f);
|
58
|
+
}
|
59
|
+
};
|
60
|
+
|
61
|
+
template<typename _Base>
|
62
|
+
struct SumWordWeight<_Base, TermWeight::one>
|
63
|
+
{
|
64
|
+
int32_t sumWordWeight = 0;
|
65
|
+
int32_t getSumWordWeight() const
|
66
|
+
{
|
67
|
+
return sumWordWeight;
|
68
|
+
}
|
69
|
+
|
70
|
+
void updateSumWordWeight(size_t realV)
|
71
|
+
{
|
72
|
+
sumWordWeight = std::count_if(static_cast<_Base*>(this)->words.begin(), static_cast<_Base*>(this)->words.end(), [realV](Vid w)
|
73
|
+
{
|
74
|
+
return w < realV;
|
75
|
+
});
|
76
|
+
}
|
77
|
+
};
|
78
|
+
|
79
|
+
template<TermWeight _tw>
|
80
|
+
struct DocumentLDA : public DocumentBase, SumWordWeight<DocumentLDA<_tw>, _tw>
|
81
|
+
{
|
82
|
+
public:
|
83
|
+
using DocumentBase::DocumentBase;
|
84
|
+
using WeightType = typename std::conditional<_tw == TermWeight::one, int32_t, float>::type;
|
85
|
+
|
86
|
+
tvector<Tid> Zs;
|
87
|
+
tvector<Float> wordWeights;
|
88
|
+
ShareableVector<WeightType> numByTopic;
|
89
|
+
|
90
|
+
DEFINE_SERIALIZER_AFTER_BASE_WITH_VERSION(DocumentBase, 0, Zs, wordWeights);
|
91
|
+
DEFINE_TAGGED_SERIALIZER_AFTER_BASE_WITH_VERSION(DocumentBase, 1, 0x00010001, Zs, wordWeights);
|
92
|
+
|
93
|
+
template<typename _TopicModel> void update(WeightType* ptr, const _TopicModel& mdl);
|
94
|
+
|
95
|
+
WeightType getWordWeight(size_t idx) const
|
96
|
+
{
|
97
|
+
return _tw == TermWeight::one ? 1 : wordWeights[idx];
|
98
|
+
}
|
99
|
+
|
100
|
+
std::vector<Float> getCountVector(size_t V) const
|
101
|
+
{
|
102
|
+
std::vector<Float> vs(V);
|
103
|
+
for (size_t i = 0; i < words.size(); ++i)
|
104
|
+
{
|
105
|
+
if (words[i] >= V) continue;
|
106
|
+
vs[words[i]] += wordWeights.empty() ? 1.f : wordWeights[i];
|
107
|
+
}
|
108
|
+
return vs;
|
109
|
+
}
|
110
|
+
};
|
111
|
+
|
112
|
+
class ILDAModel : public ITopicModel
|
113
|
+
{
|
114
|
+
public:
|
115
|
+
using DefaultDocType = DocumentLDA<TermWeight::one>;
|
116
|
+
static ILDAModel* create(TermWeight _weight, size_t _K = 1,
|
117
|
+
Float _alpha = 0.1, Float _eta = 0.01, size_t seed = std::random_device{}(),
|
118
|
+
bool scalarRng = false);
|
119
|
+
|
120
|
+
virtual size_t addDoc(const std::vector<std::string>& words) = 0;
|
121
|
+
virtual std::unique_ptr<DocumentBase> makeDoc(const std::vector<std::string>& words) const = 0;
|
122
|
+
|
123
|
+
virtual size_t addDoc(const std::string& rawStr, const RawDocTokenizer::Factory& tokenizer) = 0;
|
124
|
+
virtual std::unique_ptr<DocumentBase> makeDoc(const std::string& rawStr, const RawDocTokenizer::Factory& tokenizer) const = 0;
|
125
|
+
|
126
|
+
virtual size_t addDoc(const std::string& rawStr, const std::vector<Vid>& words,
|
127
|
+
const std::vector<uint32_t>& pos, const std::vector<uint16_t>& len) = 0;
|
128
|
+
virtual std::unique_ptr<DocumentBase> makeDoc(const std::string& rawStr, const std::vector<Vid>& words,
|
129
|
+
const std::vector<uint32_t>& pos, const std::vector<uint16_t>& len) const = 0;
|
130
|
+
|
131
|
+
virtual TermWeight getTermWeight() const = 0;
|
132
|
+
virtual size_t getOptimInterval() const = 0;
|
133
|
+
virtual void setOptimInterval(size_t) = 0;
|
134
|
+
virtual size_t getBurnInIteration() const = 0;
|
135
|
+
virtual void setBurnInIteration(size_t) = 0;
|
136
|
+
virtual std::vector<uint64_t> getCountByTopic() const = 0;
|
137
|
+
virtual Float getAlpha() const = 0;
|
138
|
+
virtual Float getAlpha(size_t k) const = 0;
|
139
|
+
virtual Float getEta() const = 0;
|
140
|
+
|
141
|
+
virtual std::vector<Float> getWordPrior(const std::string& word) const = 0;
|
142
|
+
virtual void setWordPrior(const std::string& word, const std::vector<Float>& priors) = 0;
|
143
|
+
};
|
144
|
+
}
|
@@ -0,0 +1,442 @@
|
|
1
|
+
#pragma once
|
2
|
+
#include <unordered_set>
|
3
|
+
#include <numeric>
|
4
|
+
#include "TopicModel.hpp"
|
5
|
+
#include <Eigen/Dense>
|
6
|
+
#include "../Utils/Utils.hpp"
|
7
|
+
#include "../Utils/math.h"
|
8
|
+
#include "../Utils/sample.hpp"
|
9
|
+
|
10
|
+
/*
|
11
|
+
Implementation of LDA using Collapsed Variational Bayes zero-order estimation by bab2min
|
12
|
+
|
13
|
+
* Blei, D. M., Ng, A. Y., & Jordan, M. I. (2003). Latent dirichlet allocation. Journal of machine Learning research, 3(Jan), 993-1022.
|
14
|
+
|
15
|
+
Term Weighting Scheme is based on following paper:
|
16
|
+
* Wilson, A. T., & Chew, P. A. (2010, June). Term weighting schemes for latent dirichlet allocation. In human language technologies: The 2010 annual conference of the North American Chapter of the Association for Computational Linguistics (pp. 465-473). Association for Computational Linguistics.
|
17
|
+
|
18
|
+
*/
|
19
|
+
|
20
|
+
#define GETTER(name, type, field) type get##name() const override { return field; }
|
21
|
+
namespace tomoto
|
22
|
+
{
|
23
|
+
struct DocumentLDACVB0 : public DocumentBase
|
24
|
+
{
|
25
|
+
public:
|
26
|
+
using DocumentBase::DocumentBase;
|
27
|
+
|
28
|
+
Eigen::MatrixXf Zs;
|
29
|
+
Eigen::VectorXf numByTopic;
|
30
|
+
|
31
|
+
DEFINE_SERIALIZER_AFTER_BASE(DocumentBase, Zs);
|
32
|
+
|
33
|
+
template<typename _TopicModel> void update(Float* ptr, const _TopicModel& mdl);
|
34
|
+
|
35
|
+
int32_t getSumWordWeight() const
|
36
|
+
{
|
37
|
+
return this->words.size();
|
38
|
+
}
|
39
|
+
};
|
40
|
+
|
41
|
+
struct ModelStateLDACVB0
|
42
|
+
{
|
43
|
+
Eigen::VectorXf zLikelihood;
|
44
|
+
Eigen::VectorXf numByTopic;
|
45
|
+
Eigen::MatrixXf numByTopicWord;
|
46
|
+
|
47
|
+
DEFINE_SERIALIZER(numByTopic, numByTopicWord);
|
48
|
+
};
|
49
|
+
|
50
|
+
class ILDACVB0Model : public ITopicModel
|
51
|
+
{
|
52
|
+
public:
|
53
|
+
using DefaultDocType = DocumentLDACVB0;
|
54
|
+
static ILDACVB0Model* create(size_t _K = 1, Float _alpha = 0.1, Float _eta = 0.01, size_t _rg = std::random_device{}());
|
55
|
+
|
56
|
+
virtual size_t addDoc(const std::vector<std::string>& words) = 0;
|
57
|
+
virtual std::unique_ptr<DocumentBase> makeDoc(const std::vector<std::string>& words) const = 0;
|
58
|
+
TermWeight getTermWeight() const { return TermWeight::one; };
|
59
|
+
virtual size_t getOptimInterval() const = 0;
|
60
|
+
virtual void setOptimInterval(size_t) = 0;
|
61
|
+
virtual void setBurnInIteration(size_t) {}
|
62
|
+
virtual std::vector<size_t> getCountByTopic() const = 0;
|
63
|
+
virtual size_t getK() const = 0;
|
64
|
+
virtual Float getAlpha() const = 0;
|
65
|
+
virtual Float getEta() const = 0;
|
66
|
+
|
67
|
+
virtual std::vector<Float> getWordPrior(const std::string& word) const { return {}; }
|
68
|
+
virtual void setWordPrior(const std::string& word, const std::vector<Float>& priors) {}
|
69
|
+
};
|
70
|
+
|
71
|
+
template<typename _Interface = ILDACVB0Model,
|
72
|
+
typename _Derived = void,
|
73
|
+
typename _DocType = DocumentLDACVB0,
|
74
|
+
typename _ModelState = ModelStateLDACVB0>
|
75
|
+
class LDACVB0Model : public TopicModel<0, _Interface,
|
76
|
+
typename std::conditional<std::is_same<_Derived, void>::value, LDACVB0Model<>, _Derived>::type,
|
77
|
+
_DocType, _ModelState>
|
78
|
+
{
|
79
|
+
protected:
|
80
|
+
using DerivedClass = typename std::conditional<std::is_same<_Derived, void>::value, LDACVB0Model, _Derived>::type;
|
81
|
+
using BaseClass = TopicModel<0, _Interface, DerivedClass, _DocType, _ModelState>;
|
82
|
+
friend BaseClass;
|
83
|
+
|
84
|
+
static constexpr const char TWID[] = "one\0";
|
85
|
+
static constexpr static constexpr char TMID[] = "LDA\0";
|
86
|
+
|
87
|
+
Float alpha;
|
88
|
+
Eigen::Matrix<Float, -1, 1> alphas;
|
89
|
+
Float eta;
|
90
|
+
Tid K;
|
91
|
+
size_t optimInterval = 50;
|
92
|
+
|
93
|
+
template<typename _List>
|
94
|
+
static Float calcDigammaSum(_List list, size_t len, Float alpha)
|
95
|
+
{
|
96
|
+
auto listExpr = Eigen::Matrix<Float, -1, 1>::NullaryExpr(len, list);
|
97
|
+
auto dAlpha = math::digammaT(alpha);
|
98
|
+
return (math::digammaApprox(listExpr.array() + alpha) - dAlpha).sum();
|
99
|
+
}
|
100
|
+
|
101
|
+
void optimizeParameters(ThreadPool& pool, _ModelState* localData)
|
102
|
+
{
|
103
|
+
const auto K = this->K;
|
104
|
+
for (size_t i = 0; i < 5; ++i)
|
105
|
+
{
|
106
|
+
Float denom = calcDigammaSum([&](size_t i) { return this->docs[i].getSumWordWeight(); }, this->docs.size(), alphas.sum());
|
107
|
+
for (size_t k = 0; k < K; ++k)
|
108
|
+
{
|
109
|
+
Float nom = calcDigammaSum([&](size_t i) { return this->docs[i].numByTopic[k]; }, this->docs.size(), alphas(k));
|
110
|
+
alphas(k) = std::max(nom / denom * alphas(k), 1e-5f);
|
111
|
+
}
|
112
|
+
}
|
113
|
+
}
|
114
|
+
|
115
|
+
const Eigen::VectorXf& getZLikelihoods(_ModelState& ld, const _DocType& doc, size_t docId, size_t vid) const
|
116
|
+
{
|
117
|
+
const size_t V = this->realV;
|
118
|
+
assert(vid < V);
|
119
|
+
auto& zLikelihood = ld.zLikelihood;
|
120
|
+
zLikelihood = (doc.numByTopic.array().template cast<Float>() + alphas.array())
|
121
|
+
* (ld.numByTopicWord.col(vid).array().template cast<Float>() + eta)
|
122
|
+
/ (ld.numByTopic.array().template cast<Float>() + V * eta);
|
123
|
+
zLikelihood /= zLikelihood.sum() + 1e-10;
|
124
|
+
return zLikelihood;
|
125
|
+
}
|
126
|
+
|
127
|
+
template<int _Inc, typename _Vec>
|
128
|
+
inline void addWordTo(_ModelState& ld, _DocType& doc, uint32_t pid, Vid vid, _Vec tDist) const
|
129
|
+
{
|
130
|
+
assert(vid < this->realV);
|
131
|
+
constexpr bool _dec = _Inc < 0;
|
132
|
+
doc.numByTopic += _Inc * tDist;
|
133
|
+
if (_dec) doc.numByTopic = doc.numByTopic.cwiseMax(0);
|
134
|
+
ld.numByTopic += _Inc * tDist;
|
135
|
+
if (_dec) ld.numByTopic = ld.numByTopic.cwiseMax(0);
|
136
|
+
ld.numByTopicWord.col(vid) += _Inc * tDist;
|
137
|
+
if (_dec) ld.numByTopicWord.col(vid) = ld.numByTopicWord.col(vid).cwiseMax(0);
|
138
|
+
}
|
139
|
+
|
140
|
+
template<ParallelScheme _ps, bool _infer, typename _ExtraDocData>
|
141
|
+
void sampleDocument(_DocType& doc, const _ExtraDocData& edd, size_t docId, _ModelState& ld, _RandGen& rgs, size_t iterationCnt, size_t partitionId = 0) const
|
142
|
+
{
|
143
|
+
for (size_t w = 0; w < doc.words.size(); ++w)
|
144
|
+
{
|
145
|
+
if (doc.words[w] >= this->realV) continue;
|
146
|
+
addWordTo<-1>(ld, doc, w, doc.words[w], doc.Zs.col(w));
|
147
|
+
doc.Zs.col(w) = static_cast<const DerivedClass*>(this)->getZLikelihoods(ld, doc, docId, doc.words[w]);
|
148
|
+
addWordTo<1>(ld, doc, w, doc.words[w], doc.Zs.col(w));
|
149
|
+
}
|
150
|
+
}
|
151
|
+
|
152
|
+
template<typename _DocIter, typename _ExtraDocData>
|
153
|
+
void updatePartition(ThreadPool& pool, _ModelState* localData, _DocIter first, _DocIter last, _ExtraDocData& edd)
|
154
|
+
{
|
155
|
+
}
|
156
|
+
|
157
|
+
template<ParallelScheme _ps>
|
158
|
+
void trainOne(ThreadPool& pool, _ModelState* localData, _RandGen* rgs)
|
159
|
+
{
|
160
|
+
std::vector<std::future<void>> res;
|
161
|
+
const size_t chStride = std::min(pool.getNumWorkers() * 8, this->docs.size());
|
162
|
+
for (size_t ch = 0; ch < chStride; ++ch)
|
163
|
+
{
|
164
|
+
res.emplace_back(pool.enqueue([&, this, ch, chStride](size_t threadId)
|
165
|
+
{
|
166
|
+
forRandom((this->docs.size() - 1 - ch) / chStride + 1, rgs[threadId](), [&, this](size_t id)
|
167
|
+
{
|
168
|
+
static_cast<DerivedClass*>(this)->template sampleDocument<ParallelScheme::copy_merge>(
|
169
|
+
this->docs[id * chStride + ch], 0, id * chStride + ch,
|
170
|
+
localData[threadId], rgs[threadId], this->globalStep);
|
171
|
+
});
|
172
|
+
}));
|
173
|
+
}
|
174
|
+
for (auto& r : res) r.get();
|
175
|
+
static_cast<DerivedClass*>(this)->updateGlobalInfo(pool, localData);
|
176
|
+
static_cast<DerivedClass*>(this)->mergeState(pool, this->globalState, this->tState, localData);
|
177
|
+
if (this->globalStep >= 250 && optimInterval && (this->globalStep + 1) % optimInterval == 0)
|
178
|
+
{
|
179
|
+
static_cast<DerivedClass*>(this)->optimizeParameters(pool, localData);
|
180
|
+
}
|
181
|
+
}
|
182
|
+
|
183
|
+
void updateGlobalInfo(ThreadPool& pool, _ModelState* localData)
|
184
|
+
{
|
185
|
+
std::vector<std::future<void>> res;
|
186
|
+
|
187
|
+
this->globalState.numByTopic.setZero();
|
188
|
+
this->globalState.numByTopicWord.setZero();
|
189
|
+
for (auto& doc : this->docs)
|
190
|
+
{
|
191
|
+
doc.numByTopic = doc.Zs.rowwise().sum();
|
192
|
+
this->globalState.numByTopic += doc.numByTopic;
|
193
|
+
for (size_t i = 0; i < doc.words.size(); ++i)
|
194
|
+
{
|
195
|
+
this->globalState.numByTopicWord.col(doc.words[i]) += doc.Zs.col(i);
|
196
|
+
}
|
197
|
+
}
|
198
|
+
|
199
|
+
for (size_t i = 0; i < pool.getNumWorkers(); ++i)
|
200
|
+
{
|
201
|
+
res.emplace_back(pool.enqueue([&, i](size_t threadId)
|
202
|
+
{
|
203
|
+
localData[i] = this->globalState;
|
204
|
+
}));
|
205
|
+
}
|
206
|
+
for (auto& r : res) r.get();
|
207
|
+
}
|
208
|
+
|
209
|
+
void mergeState(ThreadPool& pool, _ModelState& globalState, _ModelState& tState, _ModelState* localData) const
|
210
|
+
{
|
211
|
+
}
|
212
|
+
|
213
|
+
template<typename _DocIter>
|
214
|
+
double getLLDocs(_DocIter _first, _DocIter _last) const
|
215
|
+
{
|
216
|
+
double ll = 0;
|
217
|
+
// doc-topic distribution
|
218
|
+
ll += (math::lgammaT(K*alpha) - math::lgammaT(alpha)*K) * std::distance(_first, _last);
|
219
|
+
for (; _first != _last; ++_first)
|
220
|
+
{
|
221
|
+
auto& doc = *_first;
|
222
|
+
ll -= math::lgammaT(doc.getSumWordWeight() + K * alpha);
|
223
|
+
for (Tid k = 0; k < K; ++k)
|
224
|
+
{
|
225
|
+
ll += math::lgammaT(doc.numByTopic[k] + alpha);
|
226
|
+
}
|
227
|
+
}
|
228
|
+
return ll;
|
229
|
+
}
|
230
|
+
|
231
|
+
double getLLRest(const _ModelState& ld) const
|
232
|
+
{
|
233
|
+
double ll = 0;
|
234
|
+
const size_t V = this->realV;
|
235
|
+
// topic-word distribution
|
236
|
+
// it has the very-small-value problem
|
237
|
+
ll += (math::lgammaT(V*eta) - math::lgammaT(eta)*V) * K;
|
238
|
+
for (Tid k = 0; k < K; ++k)
|
239
|
+
{
|
240
|
+
ll -= math::lgammaT(ld.numByTopic[k] + V * eta);
|
241
|
+
for (Vid v = 0; v < V; ++v)
|
242
|
+
{
|
243
|
+
ll += math::lgammaT(ld.numByTopicWord(k, v) + eta);
|
244
|
+
}
|
245
|
+
}
|
246
|
+
return ll;
|
247
|
+
}
|
248
|
+
|
249
|
+
double getLL() const
|
250
|
+
{
|
251
|
+
return static_cast<const DerivedClass*>(this)->template getLLDocs<>(this->docs.begin(), this->docs.end())
|
252
|
+
+ static_cast<const DerivedClass*>(this)->getLLRest(this->globalState);
|
253
|
+
}
|
254
|
+
|
255
|
+
void prepareShared()
|
256
|
+
{
|
257
|
+
}
|
258
|
+
|
259
|
+
void prepareDoc(_DocType& doc, Float* topicDocPtr, size_t wordSize) const
|
260
|
+
{
|
261
|
+
doc.numByTopic = Eigen::VectorXf::Zero(K);
|
262
|
+
doc.Zs = Eigen::MatrixXf::Zero(K, wordSize);
|
263
|
+
}
|
264
|
+
|
265
|
+
void initGlobalState(bool initDocs)
|
266
|
+
{
|
267
|
+
const size_t V = this->realV;
|
268
|
+
this->globalState.zLikelihood = Eigen::Matrix<Float, -1, 1>::Zero(K);
|
269
|
+
if (initDocs)
|
270
|
+
{
|
271
|
+
this->globalState.numByTopic = Eigen::Matrix<Float, -1, 1>::Zero(K);
|
272
|
+
this->globalState.numByTopicWord = Eigen::Matrix<Float, -1, -1>::Zero(K, V);
|
273
|
+
}
|
274
|
+
}
|
275
|
+
|
276
|
+
struct Generator
|
277
|
+
{
|
278
|
+
std::uniform_int_distribution<Tid> theta;
|
279
|
+
};
|
280
|
+
|
281
|
+
Generator makeGeneratorForInit(const _DocType*) const
|
282
|
+
{
|
283
|
+
return Generator{ std::uniform_int_distribution<Tid>{0, (Tid)(K - 1)} };
|
284
|
+
}
|
285
|
+
|
286
|
+
template<bool _Infer>
|
287
|
+
void updateStateWithDoc(Generator& g, _ModelState& ld, _RandGen& rgs, _DocType& doc, size_t i) const
|
288
|
+
{
|
289
|
+
doc.Zs.col(i).setZero();
|
290
|
+
doc.Zs(g.theta(rgs), i) = 1;
|
291
|
+
addWordTo<1>(ld, doc, i, doc.words[i], doc.Zs.col(i));
|
292
|
+
}
|
293
|
+
|
294
|
+
template<bool _Infer, typename _Generator>
|
295
|
+
void initializeDocState(_DocType& doc, Float* topicDocPtr, _Generator& g, _ModelState& ld, _RandGen& rgs) const
|
296
|
+
{
|
297
|
+
std::vector<uint32_t> tf(this->realV);
|
298
|
+
static_cast<const DerivedClass*>(this)->prepareDoc(doc, topicDocPtr, doc.words.size());
|
299
|
+
|
300
|
+
for (size_t i = 0; i < doc.words.size(); ++i)
|
301
|
+
{
|
302
|
+
if (doc.words[i] >= this->realV) continue;
|
303
|
+
static_cast<const DerivedClass*>(this)->template updateStateWithDoc<_Infer>(g, ld, rgs, doc, i);
|
304
|
+
}
|
305
|
+
}
|
306
|
+
|
307
|
+
std::vector<uint64_t> _getTopicsCount() const
|
308
|
+
{
|
309
|
+
Eigen::VectorXf cnt = Eigen::VectorXf::Zero(K);
|
310
|
+
for (auto& doc : this->docs)
|
311
|
+
{
|
312
|
+
cnt += doc.Zs.rowwise().sum();
|
313
|
+
}
|
314
|
+
|
315
|
+
return { cnt.data(), cnt.data() + K };
|
316
|
+
}
|
317
|
+
|
318
|
+
template<ParallelScheme _ps>
|
319
|
+
size_t estimateMaxThreads() const
|
320
|
+
{
|
321
|
+
if (_ps == ParallelScheme::partition)
|
322
|
+
{
|
323
|
+
return this->realV / 4;
|
324
|
+
}
|
325
|
+
if (_ps == ParallelScheme::copy_merge)
|
326
|
+
{
|
327
|
+
return this->docs.size() / 2;
|
328
|
+
}
|
329
|
+
return (size_t)-1;
|
330
|
+
}
|
331
|
+
|
332
|
+
DEFINE_SERIALIZER(alpha, eta, K);
|
333
|
+
|
334
|
+
public:
|
335
|
+
LDACVB0Model(size_t _K = 1, Float _alpha = 0.1, Float _eta = 0.01, size_t _rg = std::random_device{}())
|
336
|
+
: BaseClass(_rg), K(_K), alpha(_alpha), eta(_eta)
|
337
|
+
{
|
338
|
+
alphas = Eigen::Matrix<Float, -1, 1>::Constant(K, alpha);
|
339
|
+
}
|
340
|
+
GETTER(K, size_t, K);
|
341
|
+
GETTER(Alpha, Float, alpha);
|
342
|
+
GETTER(Eta, Float, eta);
|
343
|
+
GETTER(OptimInterval, size_t, optimInterval);
|
344
|
+
|
345
|
+
|
346
|
+
void setOptimInterval(size_t _optimInterval) override
|
347
|
+
{
|
348
|
+
optimInterval = _optimInterval;
|
349
|
+
}
|
350
|
+
|
351
|
+
size_t addDoc(const std::vector<std::string>& words) override
|
352
|
+
{
|
353
|
+
return this->_addDoc(this->_makeDoc(words));
|
354
|
+
}
|
355
|
+
|
356
|
+
std::unique_ptr<DocumentBase> makeDoc(const std::vector<std::string>& words) const override
|
357
|
+
{
|
358
|
+
return make_unique<_DocType>(as_mutable(this)->template _makeDoc<true>(words));
|
359
|
+
}
|
360
|
+
|
361
|
+
void updateDocs()
|
362
|
+
{
|
363
|
+
for (auto& doc : this->docs)
|
364
|
+
{
|
365
|
+
doc.template update<>(nullptr, *static_cast<DerivedClass*>(this));
|
366
|
+
}
|
367
|
+
}
|
368
|
+
|
369
|
+
void prepare(bool initDocs = true, size_t minWordCnt = 0, size_t minWordDf = 0, size_t removeTopN = 0) override
|
370
|
+
{
|
371
|
+
if (initDocs) this->removeStopwords(minWordCnt, minWordDf, removeTopN);
|
372
|
+
static_cast<DerivedClass*>(this)->updateWeakArray();
|
373
|
+
static_cast<DerivedClass*>(this)->initGlobalState(initDocs);
|
374
|
+
|
375
|
+
if (initDocs)
|
376
|
+
{
|
377
|
+
auto generator = static_cast<DerivedClass*>(this)->makeGeneratorForInit(nullptr);
|
378
|
+
for (auto& doc : this->docs)
|
379
|
+
{
|
380
|
+
initializeDocState<false>(doc, nullptr, generator, this->globalState, this->rg);
|
381
|
+
}
|
382
|
+
}
|
383
|
+
else
|
384
|
+
{
|
385
|
+
static_cast<DerivedClass*>(this)->updateDocs();
|
386
|
+
}
|
387
|
+
static_cast<DerivedClass*>(this)->prepareShared();
|
388
|
+
}
|
389
|
+
|
390
|
+
std::vector<size_t> getCountByTopic() const override
|
391
|
+
{
|
392
|
+
return static_cast<const DerivedClass*>(this)->_getTopicsCount();
|
393
|
+
}
|
394
|
+
|
395
|
+
std::vector<Float> getTopicsByDoc(const _DocType& doc) const
|
396
|
+
{
|
397
|
+
std::vector<Float> ret(K);
|
398
|
+
Float sum = doc.getSumWordWeight() + K * alpha;
|
399
|
+
transform(doc.numByTopic.data(), doc.numByTopic.data() + K, ret.begin(), [sum, this](size_t n)
|
400
|
+
{
|
401
|
+
return (n + alpha) / sum;
|
402
|
+
});
|
403
|
+
return ret;
|
404
|
+
}
|
405
|
+
|
406
|
+
std::vector<Float> _getWidsByTopic(Tid tid) const
|
407
|
+
{
|
408
|
+
assert(tid < K);
|
409
|
+
const size_t V = this->realV;
|
410
|
+
std::vector<Float> ret(V);
|
411
|
+
Float sum = this->globalState.numByTopic[tid] + V * eta;
|
412
|
+
auto r = this->globalState.numByTopicWord.row(tid);
|
413
|
+
for (size_t v = 0; v < V; ++v)
|
414
|
+
{
|
415
|
+
ret[v] = (r[v] + eta) / sum;
|
416
|
+
}
|
417
|
+
return ret;
|
418
|
+
}
|
419
|
+
|
420
|
+
template<bool _Together, ParallelScheme _ps, typename _Iter>
|
421
|
+
std::vector<double> _infer(_Iter docFirst, _Iter docLast, size_t maxIter, Float tolerance, size_t numWorkers) const
|
422
|
+
{
|
423
|
+
return {};
|
424
|
+
}
|
425
|
+
};
|
426
|
+
|
427
|
+
template<typename _TopicModel>
|
428
|
+
void DocumentLDACVB0::update(Float * ptr, const _TopicModel & mdl)
|
429
|
+
{
|
430
|
+
numByTopic = Eigen::VectorXf::Zero(mdl.getK());
|
431
|
+
for (size_t i = 0; i < Zs.cols(); ++i)
|
432
|
+
{
|
433
|
+
numByTopic += Zs.col(i);
|
434
|
+
}
|
435
|
+
}
|
436
|
+
|
437
|
+
inline ILDACVB0Model* ILDACVB0Model::create(size_t _K, Float _alpha, Float _eta, const _RandGen& _rg)
|
438
|
+
{
|
439
|
+
return new LDACVB0Model<>(_K, _alpha, _eta, _rg);
|
440
|
+
}
|
441
|
+
|
442
|
+
}
|