tomoto 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE.txt +22 -0
- data/README.md +123 -0
- data/ext/tomoto/ext.cpp +245 -0
- data/ext/tomoto/extconf.rb +28 -0
- data/lib/tomoto.rb +12 -0
- data/lib/tomoto/ct.rb +11 -0
- data/lib/tomoto/hdp.rb +11 -0
- data/lib/tomoto/lda.rb +67 -0
- data/lib/tomoto/version.rb +3 -0
- data/vendor/EigenRand/EigenRand/Core.h +1139 -0
- data/vendor/EigenRand/EigenRand/Dists/Basic.h +111 -0
- data/vendor/EigenRand/EigenRand/Dists/Discrete.h +877 -0
- data/vendor/EigenRand/EigenRand/Dists/GammaPoisson.h +108 -0
- data/vendor/EigenRand/EigenRand/Dists/NormalExp.h +626 -0
- data/vendor/EigenRand/EigenRand/EigenRand +19 -0
- data/vendor/EigenRand/EigenRand/Macro.h +24 -0
- data/vendor/EigenRand/EigenRand/MorePacketMath.h +978 -0
- data/vendor/EigenRand/EigenRand/PacketFilter.h +286 -0
- data/vendor/EigenRand/EigenRand/PacketRandomEngine.h +624 -0
- data/vendor/EigenRand/EigenRand/RandUtils.h +413 -0
- data/vendor/EigenRand/EigenRand/doc.h +220 -0
- data/vendor/EigenRand/LICENSE +21 -0
- data/vendor/EigenRand/README.md +288 -0
- data/vendor/eigen/COPYING.BSD +26 -0
- data/vendor/eigen/COPYING.GPL +674 -0
- data/vendor/eigen/COPYING.LGPL +502 -0
- data/vendor/eigen/COPYING.MINPACK +52 -0
- data/vendor/eigen/COPYING.MPL2 +373 -0
- data/vendor/eigen/COPYING.README +18 -0
- data/vendor/eigen/Eigen/CMakeLists.txt +19 -0
- data/vendor/eigen/Eigen/Cholesky +46 -0
- data/vendor/eigen/Eigen/CholmodSupport +48 -0
- data/vendor/eigen/Eigen/Core +537 -0
- data/vendor/eigen/Eigen/Dense +7 -0
- data/vendor/eigen/Eigen/Eigen +2 -0
- data/vendor/eigen/Eigen/Eigenvalues +61 -0
- data/vendor/eigen/Eigen/Geometry +62 -0
- data/vendor/eigen/Eigen/Householder +30 -0
- data/vendor/eigen/Eigen/IterativeLinearSolvers +48 -0
- data/vendor/eigen/Eigen/Jacobi +33 -0
- data/vendor/eigen/Eigen/LU +50 -0
- data/vendor/eigen/Eigen/MetisSupport +35 -0
- data/vendor/eigen/Eigen/OrderingMethods +73 -0
- data/vendor/eigen/Eigen/PaStiXSupport +48 -0
- data/vendor/eigen/Eigen/PardisoSupport +35 -0
- data/vendor/eigen/Eigen/QR +51 -0
- data/vendor/eigen/Eigen/QtAlignedMalloc +40 -0
- data/vendor/eigen/Eigen/SPQRSupport +34 -0
- data/vendor/eigen/Eigen/SVD +51 -0
- data/vendor/eigen/Eigen/Sparse +36 -0
- data/vendor/eigen/Eigen/SparseCholesky +45 -0
- data/vendor/eigen/Eigen/SparseCore +69 -0
- data/vendor/eigen/Eigen/SparseLU +46 -0
- data/vendor/eigen/Eigen/SparseQR +37 -0
- data/vendor/eigen/Eigen/StdDeque +27 -0
- data/vendor/eigen/Eigen/StdList +26 -0
- data/vendor/eigen/Eigen/StdVector +27 -0
- data/vendor/eigen/Eigen/SuperLUSupport +64 -0
- data/vendor/eigen/Eigen/UmfPackSupport +40 -0
- data/vendor/eigen/Eigen/src/Cholesky/LDLT.h +673 -0
- data/vendor/eigen/Eigen/src/Cholesky/LLT.h +542 -0
- data/vendor/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h +99 -0
- data/vendor/eigen/Eigen/src/CholmodSupport/CholmodSupport.h +639 -0
- data/vendor/eigen/Eigen/src/Core/Array.h +329 -0
- data/vendor/eigen/Eigen/src/Core/ArrayBase.h +226 -0
- data/vendor/eigen/Eigen/src/Core/ArrayWrapper.h +209 -0
- data/vendor/eigen/Eigen/src/Core/Assign.h +90 -0
- data/vendor/eigen/Eigen/src/Core/AssignEvaluator.h +935 -0
- data/vendor/eigen/Eigen/src/Core/Assign_MKL.h +178 -0
- data/vendor/eigen/Eigen/src/Core/BandMatrix.h +353 -0
- data/vendor/eigen/Eigen/src/Core/Block.h +452 -0
- data/vendor/eigen/Eigen/src/Core/BooleanRedux.h +164 -0
- data/vendor/eigen/Eigen/src/Core/CommaInitializer.h +160 -0
- data/vendor/eigen/Eigen/src/Core/ConditionEstimator.h +175 -0
- data/vendor/eigen/Eigen/src/Core/CoreEvaluators.h +1688 -0
- data/vendor/eigen/Eigen/src/Core/CoreIterators.h +127 -0
- data/vendor/eigen/Eigen/src/Core/CwiseBinaryOp.h +184 -0
- data/vendor/eigen/Eigen/src/Core/CwiseNullaryOp.h +866 -0
- data/vendor/eigen/Eigen/src/Core/CwiseTernaryOp.h +197 -0
- data/vendor/eigen/Eigen/src/Core/CwiseUnaryOp.h +103 -0
- data/vendor/eigen/Eigen/src/Core/CwiseUnaryView.h +128 -0
- data/vendor/eigen/Eigen/src/Core/DenseBase.h +611 -0
- data/vendor/eigen/Eigen/src/Core/DenseCoeffsBase.h +681 -0
- data/vendor/eigen/Eigen/src/Core/DenseStorage.h +570 -0
- data/vendor/eigen/Eigen/src/Core/Diagonal.h +260 -0
- data/vendor/eigen/Eigen/src/Core/DiagonalMatrix.h +343 -0
- data/vendor/eigen/Eigen/src/Core/DiagonalProduct.h +28 -0
- data/vendor/eigen/Eigen/src/Core/Dot.h +318 -0
- data/vendor/eigen/Eigen/src/Core/EigenBase.h +159 -0
- data/vendor/eigen/Eigen/src/Core/ForceAlignedAccess.h +146 -0
- data/vendor/eigen/Eigen/src/Core/Fuzzy.h +155 -0
- data/vendor/eigen/Eigen/src/Core/GeneralProduct.h +455 -0
- data/vendor/eigen/Eigen/src/Core/GenericPacketMath.h +593 -0
- data/vendor/eigen/Eigen/src/Core/GlobalFunctions.h +187 -0
- data/vendor/eigen/Eigen/src/Core/IO.h +225 -0
- data/vendor/eigen/Eigen/src/Core/Inverse.h +118 -0
- data/vendor/eigen/Eigen/src/Core/Map.h +171 -0
- data/vendor/eigen/Eigen/src/Core/MapBase.h +303 -0
- data/vendor/eigen/Eigen/src/Core/MathFunctions.h +1415 -0
- data/vendor/eigen/Eigen/src/Core/MathFunctionsImpl.h +101 -0
- data/vendor/eigen/Eigen/src/Core/Matrix.h +459 -0
- data/vendor/eigen/Eigen/src/Core/MatrixBase.h +529 -0
- data/vendor/eigen/Eigen/src/Core/NestByValue.h +110 -0
- data/vendor/eigen/Eigen/src/Core/NoAlias.h +108 -0
- data/vendor/eigen/Eigen/src/Core/NumTraits.h +248 -0
- data/vendor/eigen/Eigen/src/Core/PermutationMatrix.h +633 -0
- data/vendor/eigen/Eigen/src/Core/PlainObjectBase.h +1035 -0
- data/vendor/eigen/Eigen/src/Core/Product.h +186 -0
- data/vendor/eigen/Eigen/src/Core/ProductEvaluators.h +1112 -0
- data/vendor/eigen/Eigen/src/Core/Random.h +182 -0
- data/vendor/eigen/Eigen/src/Core/Redux.h +505 -0
- data/vendor/eigen/Eigen/src/Core/Ref.h +283 -0
- data/vendor/eigen/Eigen/src/Core/Replicate.h +142 -0
- data/vendor/eigen/Eigen/src/Core/ReturnByValue.h +117 -0
- data/vendor/eigen/Eigen/src/Core/Reverse.h +211 -0
- data/vendor/eigen/Eigen/src/Core/Select.h +162 -0
- data/vendor/eigen/Eigen/src/Core/SelfAdjointView.h +352 -0
- data/vendor/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h +47 -0
- data/vendor/eigen/Eigen/src/Core/Solve.h +188 -0
- data/vendor/eigen/Eigen/src/Core/SolveTriangular.h +235 -0
- data/vendor/eigen/Eigen/src/Core/SolverBase.h +130 -0
- data/vendor/eigen/Eigen/src/Core/StableNorm.h +221 -0
- data/vendor/eigen/Eigen/src/Core/Stride.h +111 -0
- data/vendor/eigen/Eigen/src/Core/Swap.h +67 -0
- data/vendor/eigen/Eigen/src/Core/Transpose.h +403 -0
- data/vendor/eigen/Eigen/src/Core/Transpositions.h +407 -0
- data/vendor/eigen/Eigen/src/Core/TriangularMatrix.h +983 -0
- data/vendor/eigen/Eigen/src/Core/VectorBlock.h +96 -0
- data/vendor/eigen/Eigen/src/Core/VectorwiseOp.h +695 -0
- data/vendor/eigen/Eigen/src/Core/Visitor.h +273 -0
- data/vendor/eigen/Eigen/src/Core/arch/AVX/Complex.h +451 -0
- data/vendor/eigen/Eigen/src/Core/arch/AVX/MathFunctions.h +439 -0
- data/vendor/eigen/Eigen/src/Core/arch/AVX/PacketMath.h +637 -0
- data/vendor/eigen/Eigen/src/Core/arch/AVX/TypeCasting.h +51 -0
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/MathFunctions.h +391 -0
- data/vendor/eigen/Eigen/src/Core/arch/AVX512/PacketMath.h +1316 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/Complex.h +430 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/MathFunctions.h +322 -0
- data/vendor/eigen/Eigen/src/Core/arch/AltiVec/PacketMath.h +1061 -0
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/Complex.h +103 -0
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/Half.h +674 -0
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/MathFunctions.h +91 -0
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMath.h +333 -0
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/PacketMathHalf.h +1124 -0
- data/vendor/eigen/Eigen/src/Core/arch/CUDA/TypeCasting.h +212 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/ConjHelper.h +29 -0
- data/vendor/eigen/Eigen/src/Core/arch/Default/Settings.h +49 -0
- data/vendor/eigen/Eigen/src/Core/arch/NEON/Complex.h +490 -0
- data/vendor/eigen/Eigen/src/Core/arch/NEON/MathFunctions.h +91 -0
- data/vendor/eigen/Eigen/src/Core/arch/NEON/PacketMath.h +760 -0
- data/vendor/eigen/Eigen/src/Core/arch/SSE/Complex.h +471 -0
- data/vendor/eigen/Eigen/src/Core/arch/SSE/MathFunctions.h +562 -0
- data/vendor/eigen/Eigen/src/Core/arch/SSE/PacketMath.h +895 -0
- data/vendor/eigen/Eigen/src/Core/arch/SSE/TypeCasting.h +77 -0
- data/vendor/eigen/Eigen/src/Core/arch/ZVector/Complex.h +397 -0
- data/vendor/eigen/Eigen/src/Core/arch/ZVector/MathFunctions.h +137 -0
- data/vendor/eigen/Eigen/src/Core/arch/ZVector/PacketMath.h +945 -0
- data/vendor/eigen/Eigen/src/Core/functors/AssignmentFunctors.h +168 -0
- data/vendor/eigen/Eigen/src/Core/functors/BinaryFunctors.h +475 -0
- data/vendor/eigen/Eigen/src/Core/functors/NullaryFunctors.h +188 -0
- data/vendor/eigen/Eigen/src/Core/functors/StlFunctors.h +136 -0
- data/vendor/eigen/Eigen/src/Core/functors/TernaryFunctors.h +25 -0
- data/vendor/eigen/Eigen/src/Core/functors/UnaryFunctors.h +792 -0
- data/vendor/eigen/Eigen/src/Core/products/GeneralBlockPanelKernel.h +2156 -0
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix.h +492 -0
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular.h +311 -0
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h +145 -0
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixMatrix_BLAS.h +122 -0
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixVector.h +619 -0
- data/vendor/eigen/Eigen/src/Core/products/GeneralMatrixVector_BLAS.h +136 -0
- data/vendor/eigen/Eigen/src/Core/products/Parallelizer.h +163 -0
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +521 -0
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixMatrix_BLAS.h +287 -0
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixVector.h +260 -0
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointMatrixVector_BLAS.h +118 -0
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointProduct.h +133 -0
- data/vendor/eigen/Eigen/src/Core/products/SelfadjointRank2Update.h +93 -0
- data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix.h +466 -0
- data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixMatrix_BLAS.h +315 -0
- data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixVector.h +350 -0
- data/vendor/eigen/Eigen/src/Core/products/TriangularMatrixVector_BLAS.h +255 -0
- data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix.h +335 -0
- data/vendor/eigen/Eigen/src/Core/products/TriangularSolverMatrix_BLAS.h +163 -0
- data/vendor/eigen/Eigen/src/Core/products/TriangularSolverVector.h +145 -0
- data/vendor/eigen/Eigen/src/Core/util/BlasUtil.h +398 -0
- data/vendor/eigen/Eigen/src/Core/util/Constants.h +547 -0
- data/vendor/eigen/Eigen/src/Core/util/DisableStupidWarnings.h +83 -0
- data/vendor/eigen/Eigen/src/Core/util/ForwardDeclarations.h +302 -0
- data/vendor/eigen/Eigen/src/Core/util/MKL_support.h +130 -0
- data/vendor/eigen/Eigen/src/Core/util/Macros.h +1001 -0
- data/vendor/eigen/Eigen/src/Core/util/Memory.h +993 -0
- data/vendor/eigen/Eigen/src/Core/util/Meta.h +534 -0
- data/vendor/eigen/Eigen/src/Core/util/NonMPL2.h +3 -0
- data/vendor/eigen/Eigen/src/Core/util/ReenableStupidWarnings.h +27 -0
- data/vendor/eigen/Eigen/src/Core/util/StaticAssert.h +218 -0
- data/vendor/eigen/Eigen/src/Core/util/XprHelper.h +821 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/ComplexEigenSolver.h +346 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/ComplexSchur.h +459 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/ComplexSchur_LAPACKE.h +91 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/EigenSolver.h +622 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/GeneralizedEigenSolver.h +418 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h +226 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/HessenbergDecomposition.h +374 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/MatrixBaseEigenvalues.h +158 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/RealQZ.h +654 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/RealSchur.h +546 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/RealSchur_LAPACKE.h +77 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver.h +870 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h +87 -0
- data/vendor/eigen/Eigen/src/Eigenvalues/Tridiagonalization.h +556 -0
- data/vendor/eigen/Eigen/src/Geometry/AlignedBox.h +392 -0
- data/vendor/eigen/Eigen/src/Geometry/AngleAxis.h +247 -0
- data/vendor/eigen/Eigen/src/Geometry/EulerAngles.h +114 -0
- data/vendor/eigen/Eigen/src/Geometry/Homogeneous.h +497 -0
- data/vendor/eigen/Eigen/src/Geometry/Hyperplane.h +282 -0
- data/vendor/eigen/Eigen/src/Geometry/OrthoMethods.h +234 -0
- data/vendor/eigen/Eigen/src/Geometry/ParametrizedLine.h +195 -0
- data/vendor/eigen/Eigen/src/Geometry/Quaternion.h +814 -0
- data/vendor/eigen/Eigen/src/Geometry/Rotation2D.h +199 -0
- data/vendor/eigen/Eigen/src/Geometry/RotationBase.h +206 -0
- data/vendor/eigen/Eigen/src/Geometry/Scaling.h +170 -0
- data/vendor/eigen/Eigen/src/Geometry/Transform.h +1542 -0
- data/vendor/eigen/Eigen/src/Geometry/Translation.h +208 -0
- data/vendor/eigen/Eigen/src/Geometry/Umeyama.h +166 -0
- data/vendor/eigen/Eigen/src/Geometry/arch/Geometry_SSE.h +161 -0
- data/vendor/eigen/Eigen/src/Householder/BlockHouseholder.h +103 -0
- data/vendor/eigen/Eigen/src/Householder/Householder.h +172 -0
- data/vendor/eigen/Eigen/src/Householder/HouseholderSequence.h +470 -0
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BasicPreconditioners.h +226 -0
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/BiCGSTAB.h +228 -0
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/ConjugateGradient.h +246 -0
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteCholesky.h +400 -0
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IncompleteLUT.h +462 -0
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/IterativeSolverBase.h +394 -0
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/LeastSquareConjugateGradient.h +216 -0
- data/vendor/eigen/Eigen/src/IterativeLinearSolvers/SolveWithGuess.h +115 -0
- data/vendor/eigen/Eigen/src/Jacobi/Jacobi.h +462 -0
- data/vendor/eigen/Eigen/src/LU/Determinant.h +101 -0
- data/vendor/eigen/Eigen/src/LU/FullPivLU.h +891 -0
- data/vendor/eigen/Eigen/src/LU/InverseImpl.h +415 -0
- data/vendor/eigen/Eigen/src/LU/PartialPivLU.h +611 -0
- data/vendor/eigen/Eigen/src/LU/PartialPivLU_LAPACKE.h +83 -0
- data/vendor/eigen/Eigen/src/LU/arch/Inverse_SSE.h +338 -0
- data/vendor/eigen/Eigen/src/MetisSupport/MetisSupport.h +137 -0
- data/vendor/eigen/Eigen/src/OrderingMethods/Amd.h +445 -0
- data/vendor/eigen/Eigen/src/OrderingMethods/Eigen_Colamd.h +1843 -0
- data/vendor/eigen/Eigen/src/OrderingMethods/Ordering.h +157 -0
- data/vendor/eigen/Eigen/src/PaStiXSupport/PaStiXSupport.h +678 -0
- data/vendor/eigen/Eigen/src/PardisoSupport/PardisoSupport.h +543 -0
- data/vendor/eigen/Eigen/src/QR/ColPivHouseholderQR.h +653 -0
- data/vendor/eigen/Eigen/src/QR/ColPivHouseholderQR_LAPACKE.h +97 -0
- data/vendor/eigen/Eigen/src/QR/CompleteOrthogonalDecomposition.h +562 -0
- data/vendor/eigen/Eigen/src/QR/FullPivHouseholderQR.h +676 -0
- data/vendor/eigen/Eigen/src/QR/HouseholderQR.h +409 -0
- data/vendor/eigen/Eigen/src/QR/HouseholderQR_LAPACKE.h +68 -0
- data/vendor/eigen/Eigen/src/SPQRSupport/SuiteSparseQRSupport.h +313 -0
- data/vendor/eigen/Eigen/src/SVD/BDCSVD.h +1246 -0
- data/vendor/eigen/Eigen/src/SVD/JacobiSVD.h +804 -0
- data/vendor/eigen/Eigen/src/SVD/JacobiSVD_LAPACKE.h +91 -0
- data/vendor/eigen/Eigen/src/SVD/SVDBase.h +315 -0
- data/vendor/eigen/Eigen/src/SVD/UpperBidiagonalization.h +414 -0
- data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky.h +689 -0
- data/vendor/eigen/Eigen/src/SparseCholesky/SimplicialCholesky_impl.h +199 -0
- data/vendor/eigen/Eigen/src/SparseCore/AmbiVector.h +377 -0
- data/vendor/eigen/Eigen/src/SparseCore/CompressedStorage.h +258 -0
- data/vendor/eigen/Eigen/src/SparseCore/ConservativeSparseSparseProduct.h +352 -0
- data/vendor/eigen/Eigen/src/SparseCore/MappedSparseMatrix.h +67 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseAssign.h +216 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseBlock.h +603 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseColEtree.h +206 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseCompressedBase.h +341 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseBinaryOp.h +726 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseCwiseUnaryOp.h +148 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseDenseProduct.h +320 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseDiagonalProduct.h +138 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseDot.h +98 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseFuzzy.h +29 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseMap.h +305 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseMatrix.h +1403 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseMatrixBase.h +405 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparsePermutation.h +178 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseProduct.h +169 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseRedux.h +49 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseRef.h +397 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseSelfAdjointView.h +656 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseSolverBase.h +124 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseSparseProductWithPruning.h +198 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseTranspose.h +92 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseTriangularView.h +189 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseUtil.h +178 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseVector.h +478 -0
- data/vendor/eigen/Eigen/src/SparseCore/SparseView.h +253 -0
- data/vendor/eigen/Eigen/src/SparseCore/TriangularSolver.h +315 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU.h +773 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLUImpl.h +66 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Memory.h +226 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Structs.h +110 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_SupernodalMatrix.h +301 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_Utils.h +80 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_column_bmod.h +181 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_column_dfs.h +179 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_copy_to_ucol.h +107 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_gemm_kernel.h +280 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_heap_relax_snode.h +126 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_kernel_bmod.h +130 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_panel_bmod.h +223 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_panel_dfs.h +258 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_pivotL.h +137 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_pruneL.h +136 -0
- data/vendor/eigen/Eigen/src/SparseLU/SparseLU_relax_snode.h +83 -0
- data/vendor/eigen/Eigen/src/SparseQR/SparseQR.h +745 -0
- data/vendor/eigen/Eigen/src/StlSupport/StdDeque.h +126 -0
- data/vendor/eigen/Eigen/src/StlSupport/StdList.h +106 -0
- data/vendor/eigen/Eigen/src/StlSupport/StdVector.h +131 -0
- data/vendor/eigen/Eigen/src/StlSupport/details.h +84 -0
- data/vendor/eigen/Eigen/src/SuperLUSupport/SuperLUSupport.h +1027 -0
- data/vendor/eigen/Eigen/src/UmfPackSupport/UmfPackSupport.h +506 -0
- data/vendor/eigen/Eigen/src/misc/Image.h +82 -0
- data/vendor/eigen/Eigen/src/misc/Kernel.h +79 -0
- data/vendor/eigen/Eigen/src/misc/RealSvd2x2.h +55 -0
- data/vendor/eigen/Eigen/src/misc/blas.h +440 -0
- data/vendor/eigen/Eigen/src/misc/lapack.h +152 -0
- data/vendor/eigen/Eigen/src/misc/lapacke.h +16291 -0
- data/vendor/eigen/Eigen/src/misc/lapacke_mangling.h +17 -0
- data/vendor/eigen/Eigen/src/plugins/ArrayCwiseBinaryOps.h +332 -0
- data/vendor/eigen/Eigen/src/plugins/ArrayCwiseUnaryOps.h +552 -0
- data/vendor/eigen/Eigen/src/plugins/BlockMethods.h +1058 -0
- data/vendor/eigen/Eigen/src/plugins/CommonCwiseBinaryOps.h +115 -0
- data/vendor/eigen/Eigen/src/plugins/CommonCwiseUnaryOps.h +163 -0
- data/vendor/eigen/Eigen/src/plugins/MatrixCwiseBinaryOps.h +152 -0
- data/vendor/eigen/Eigen/src/plugins/MatrixCwiseUnaryOps.h +85 -0
- data/vendor/eigen/README.md +3 -0
- data/vendor/eigen/bench/README.txt +55 -0
- data/vendor/eigen/bench/btl/COPYING +340 -0
- data/vendor/eigen/bench/btl/README +154 -0
- data/vendor/eigen/bench/tensors/README +21 -0
- data/vendor/eigen/blas/README.txt +6 -0
- data/vendor/eigen/demos/mandelbrot/README +10 -0
- data/vendor/eigen/demos/mix_eigen_and_c/README +9 -0
- data/vendor/eigen/demos/opengl/README +13 -0
- data/vendor/eigen/unsupported/Eigen/CXX11/src/Tensor/README.md +1760 -0
- data/vendor/eigen/unsupported/README.txt +50 -0
- data/vendor/tomotopy/LICENSE +21 -0
- data/vendor/tomotopy/README.kr.rst +375 -0
- data/vendor/tomotopy/README.rst +382 -0
- data/vendor/tomotopy/src/Labeling/FoRelevance.cpp +362 -0
- data/vendor/tomotopy/src/Labeling/FoRelevance.h +88 -0
- data/vendor/tomotopy/src/Labeling/Labeler.h +50 -0
- data/vendor/tomotopy/src/TopicModel/CT.h +37 -0
- data/vendor/tomotopy/src/TopicModel/CTModel.cpp +13 -0
- data/vendor/tomotopy/src/TopicModel/CTModel.hpp +293 -0
- data/vendor/tomotopy/src/TopicModel/DMR.h +51 -0
- data/vendor/tomotopy/src/TopicModel/DMRModel.cpp +13 -0
- data/vendor/tomotopy/src/TopicModel/DMRModel.hpp +374 -0
- data/vendor/tomotopy/src/TopicModel/DT.h +65 -0
- data/vendor/tomotopy/src/TopicModel/DTM.h +22 -0
- data/vendor/tomotopy/src/TopicModel/DTModel.cpp +15 -0
- data/vendor/tomotopy/src/TopicModel/DTModel.hpp +572 -0
- data/vendor/tomotopy/src/TopicModel/GDMR.h +37 -0
- data/vendor/tomotopy/src/TopicModel/GDMRModel.cpp +14 -0
- data/vendor/tomotopy/src/TopicModel/GDMRModel.hpp +485 -0
- data/vendor/tomotopy/src/TopicModel/HDP.h +74 -0
- data/vendor/tomotopy/src/TopicModel/HDPModel.cpp +13 -0
- data/vendor/tomotopy/src/TopicModel/HDPModel.hpp +592 -0
- data/vendor/tomotopy/src/TopicModel/HLDA.h +40 -0
- data/vendor/tomotopy/src/TopicModel/HLDAModel.cpp +13 -0
- data/vendor/tomotopy/src/TopicModel/HLDAModel.hpp +681 -0
- data/vendor/tomotopy/src/TopicModel/HPA.h +27 -0
- data/vendor/tomotopy/src/TopicModel/HPAModel.cpp +21 -0
- data/vendor/tomotopy/src/TopicModel/HPAModel.hpp +588 -0
- data/vendor/tomotopy/src/TopicModel/LDA.h +144 -0
- data/vendor/tomotopy/src/TopicModel/LDACVB0Model.hpp +442 -0
- data/vendor/tomotopy/src/TopicModel/LDAModel.cpp +13 -0
- data/vendor/tomotopy/src/TopicModel/LDAModel.hpp +1058 -0
- data/vendor/tomotopy/src/TopicModel/LLDA.h +45 -0
- data/vendor/tomotopy/src/TopicModel/LLDAModel.cpp +13 -0
- data/vendor/tomotopy/src/TopicModel/LLDAModel.hpp +203 -0
- data/vendor/tomotopy/src/TopicModel/MGLDA.h +63 -0
- data/vendor/tomotopy/src/TopicModel/MGLDAModel.cpp +17 -0
- data/vendor/tomotopy/src/TopicModel/MGLDAModel.hpp +558 -0
- data/vendor/tomotopy/src/TopicModel/PA.h +43 -0
- data/vendor/tomotopy/src/TopicModel/PAModel.cpp +13 -0
- data/vendor/tomotopy/src/TopicModel/PAModel.hpp +467 -0
- data/vendor/tomotopy/src/TopicModel/PLDA.h +17 -0
- data/vendor/tomotopy/src/TopicModel/PLDAModel.cpp +13 -0
- data/vendor/tomotopy/src/TopicModel/PLDAModel.hpp +214 -0
- data/vendor/tomotopy/src/TopicModel/SLDA.h +54 -0
- data/vendor/tomotopy/src/TopicModel/SLDAModel.cpp +17 -0
- data/vendor/tomotopy/src/TopicModel/SLDAModel.hpp +456 -0
- data/vendor/tomotopy/src/TopicModel/TopicModel.hpp +692 -0
- data/vendor/tomotopy/src/Utils/AliasMethod.hpp +169 -0
- data/vendor/tomotopy/src/Utils/Dictionary.h +80 -0
- data/vendor/tomotopy/src/Utils/EigenAddonOps.hpp +181 -0
- data/vendor/tomotopy/src/Utils/LBFGS.h +202 -0
- data/vendor/tomotopy/src/Utils/LBFGS/LineSearchBacktracking.h +120 -0
- data/vendor/tomotopy/src/Utils/LBFGS/LineSearchBracketing.h +122 -0
- data/vendor/tomotopy/src/Utils/LBFGS/Param.h +213 -0
- data/vendor/tomotopy/src/Utils/LUT.hpp +82 -0
- data/vendor/tomotopy/src/Utils/MultiNormalDistribution.hpp +69 -0
- data/vendor/tomotopy/src/Utils/PolyaGamma.hpp +200 -0
- data/vendor/tomotopy/src/Utils/PolyaGammaHybrid.hpp +672 -0
- data/vendor/tomotopy/src/Utils/ThreadPool.hpp +150 -0
- data/vendor/tomotopy/src/Utils/Trie.hpp +220 -0
- data/vendor/tomotopy/src/Utils/TruncMultiNormal.hpp +94 -0
- data/vendor/tomotopy/src/Utils/Utils.hpp +337 -0
- data/vendor/tomotopy/src/Utils/avx_gamma.h +46 -0
- data/vendor/tomotopy/src/Utils/avx_mathfun.h +736 -0
- data/vendor/tomotopy/src/Utils/exception.h +28 -0
- data/vendor/tomotopy/src/Utils/math.h +281 -0
- data/vendor/tomotopy/src/Utils/rtnorm.hpp +2690 -0
- data/vendor/tomotopy/src/Utils/sample.hpp +192 -0
- data/vendor/tomotopy/src/Utils/serializer.hpp +695 -0
- data/vendor/tomotopy/src/Utils/slp.hpp +131 -0
- data/vendor/tomotopy/src/Utils/sse_gamma.h +48 -0
- data/vendor/tomotopy/src/Utils/sse_mathfun.h +710 -0
- data/vendor/tomotopy/src/Utils/text.hpp +49 -0
- data/vendor/tomotopy/src/Utils/tvector.hpp +543 -0
- metadata +531 -0
|
@@ -0,0 +1,692 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
#include <numeric>
|
|
3
|
+
#include <unordered_set>
|
|
4
|
+
#include "../Utils/Utils.hpp"
|
|
5
|
+
#include "../Utils/Dictionary.h"
|
|
6
|
+
#include "../Utils/tvector.hpp"
|
|
7
|
+
#include "../Utils/ThreadPool.hpp"
|
|
8
|
+
#include "../Utils/serializer.hpp"
|
|
9
|
+
#include "../Utils/exception.h"
|
|
10
|
+
#include <EigenRand/EigenRand>
|
|
11
|
+
|
|
12
|
+
namespace tomoto
|
|
13
|
+
{
|
|
14
|
+
using RandGen = Eigen::Rand::P8_mt19937_64<uint32_t>;
|
|
15
|
+
using ScalarRandGen = Eigen::Rand::UniversalRandomEngine<uint32_t, std::mt19937_64>;
|
|
16
|
+
|
|
17
|
+
class DocumentBase
|
|
18
|
+
{
|
|
19
|
+
public:
|
|
20
|
+
Float weight = 1;
|
|
21
|
+
tvector<Vid> words; // word id of each word
|
|
22
|
+
std::vector<uint32_t> wOrder; // original word order (optional)
|
|
23
|
+
|
|
24
|
+
std::string docUid;
|
|
25
|
+
std::string rawStr;
|
|
26
|
+
std::vector<uint32_t> origWordPos;
|
|
27
|
+
std::vector<uint16_t> origWordLen;
|
|
28
|
+
DocumentBase(Float _weight = 1) : weight(_weight) {}
|
|
29
|
+
virtual ~DocumentBase() {}
|
|
30
|
+
|
|
31
|
+
DEFINE_SERIALIZER_WITH_VERSION(0, serializer::to_key("Docu"), weight, words, wOrder);
|
|
32
|
+
DEFINE_TAGGED_SERIALIZER_WITH_VERSION(1, 0x00010001, weight, words, wOrder,
|
|
33
|
+
rawStr, origWordPos, origWordLen,
|
|
34
|
+
docUid
|
|
35
|
+
);
|
|
36
|
+
};
|
|
37
|
+
|
|
38
|
+
enum class ParallelScheme { default_, none, copy_merge, partition, size };
|
|
39
|
+
|
|
40
|
+
inline const char* toString(ParallelScheme ps)
|
|
41
|
+
{
|
|
42
|
+
switch (ps)
|
|
43
|
+
{
|
|
44
|
+
case ParallelScheme::default_: return "default";
|
|
45
|
+
case ParallelScheme::none: return "none";
|
|
46
|
+
case ParallelScheme::copy_merge: return "copy_merge";
|
|
47
|
+
case ParallelScheme::partition: return "partition";
|
|
48
|
+
default: return "unknown";
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
class RawDocTokenizer
|
|
53
|
+
{
|
|
54
|
+
public:
|
|
55
|
+
using Token = std::tuple<std::string, uint32_t, uint32_t, bool>;
|
|
56
|
+
using Factory = std::function<RawDocTokenizer(const std::string&)>;
|
|
57
|
+
private:
|
|
58
|
+
std::function<Token()> fnNext;
|
|
59
|
+
public:
|
|
60
|
+
class Iterator
|
|
61
|
+
{
|
|
62
|
+
RawDocTokenizer* p = nullptr;
|
|
63
|
+
bool end = true;
|
|
64
|
+
std::tuple<std::string, uint32_t, uint32_t> value;
|
|
65
|
+
public:
|
|
66
|
+
Iterator()
|
|
67
|
+
{
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
Iterator(RawDocTokenizer* _p)
|
|
71
|
+
: p{ _p }, end{ false }
|
|
72
|
+
{
|
|
73
|
+
operator++();
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
std::tuple<std::string, uint32_t, uint32_t>& operator*()
|
|
77
|
+
{
|
|
78
|
+
return value;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
Iterator& operator++()
|
|
82
|
+
{
|
|
83
|
+
auto v = p->fnNext();
|
|
84
|
+
if (std::get<3>(v))
|
|
85
|
+
{
|
|
86
|
+
end = true;
|
|
87
|
+
}
|
|
88
|
+
else
|
|
89
|
+
{
|
|
90
|
+
value = std::make_tuple(std::get<0>(v), std::get<1>(v), std::get<2>(v));
|
|
91
|
+
}
|
|
92
|
+
return *this;
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
bool operator==(const Iterator& o) const
|
|
96
|
+
{
|
|
97
|
+
return o.end && end;
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
bool operator!=(const Iterator& o) const
|
|
101
|
+
{
|
|
102
|
+
return !operator==(o);
|
|
103
|
+
}
|
|
104
|
+
};
|
|
105
|
+
|
|
106
|
+
template<typename _Fn>
|
|
107
|
+
RawDocTokenizer(_Fn&& fn) : fnNext{ std::forward<_Fn>(fn) }
|
|
108
|
+
{
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
Iterator begin()
|
|
112
|
+
{
|
|
113
|
+
return Iterator{ this };
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
Iterator end()
|
|
117
|
+
{
|
|
118
|
+
return Iterator{};
|
|
119
|
+
}
|
|
120
|
+
};
|
|
121
|
+
|
|
122
|
+
class ITopicModel
|
|
123
|
+
{
|
|
124
|
+
public:
|
|
125
|
+
virtual void saveModel(std::ostream& writer, bool fullModel,
|
|
126
|
+
const std::vector<uint8_t>* extra_data = nullptr) const = 0;
|
|
127
|
+
virtual void loadModel(std::istream& reader,
|
|
128
|
+
std::vector<uint8_t>* extra_data = nullptr) = 0;
|
|
129
|
+
virtual const DocumentBase* getDoc(size_t docId) const = 0;
|
|
130
|
+
|
|
131
|
+
virtual void updateVocab(const std::vector<std::string>& words) = 0;
|
|
132
|
+
|
|
133
|
+
virtual double getLLPerWord() const = 0;
|
|
134
|
+
virtual double getPerplexity() const = 0;
|
|
135
|
+
virtual uint64_t getV() const = 0;
|
|
136
|
+
virtual uint64_t getN() const = 0;
|
|
137
|
+
virtual size_t getNumDocs() const = 0;
|
|
138
|
+
virtual const Dictionary& getVocabDict() const = 0;
|
|
139
|
+
virtual const std::vector<uint64_t>& getVocabCf() const = 0;
|
|
140
|
+
virtual const std::vector<uint64_t>& getVocabDf() const = 0;
|
|
141
|
+
|
|
142
|
+
virtual int train(size_t iteration, size_t numWorkers, ParallelScheme ps = ParallelScheme::default_) = 0;
|
|
143
|
+
virtual size_t getGlobalStep() const = 0;
|
|
144
|
+
virtual void prepare(bool initDocs = true, size_t minWordCnt = 0, size_t minWordDf = 0, size_t removeTopN = 0) = 0;
|
|
145
|
+
|
|
146
|
+
virtual size_t getK() const = 0;
|
|
147
|
+
virtual std::vector<Float> getWidsByTopic(size_t tid) const = 0;
|
|
148
|
+
virtual std::vector<std::pair<std::string, Float>> getWordsByTopicSorted(size_t tid, size_t topN) const = 0;
|
|
149
|
+
|
|
150
|
+
virtual std::vector<std::pair<std::string, Float>> getWordsByDocSorted(const DocumentBase* doc, size_t topN) const = 0;
|
|
151
|
+
|
|
152
|
+
virtual std::vector<Float> getTopicsByDoc(const DocumentBase* doc) const = 0;
|
|
153
|
+
virtual std::vector<std::pair<Tid, Float>> getTopicsByDocSorted(const DocumentBase* doc, size_t topN) const = 0;
|
|
154
|
+
virtual std::vector<double> infer(const std::vector<DocumentBase*>& docs, size_t maxIter, Float tolerance, size_t numWorkers, ParallelScheme ps, bool together) const = 0;
|
|
155
|
+
virtual ~ITopicModel() {}
|
|
156
|
+
};
|
|
157
|
+
|
|
158
|
+
template<class _TyKey, class _TyValue>
|
|
159
|
+
static std::vector<std::pair<_TyKey, _TyValue>> extractTopN(const std::vector<_TyValue>& vec, size_t topN)
|
|
160
|
+
{
|
|
161
|
+
typedef std::pair<_TyKey, _TyValue> pair_t;
|
|
162
|
+
std::vector<pair_t> ret;
|
|
163
|
+
_TyKey k = 0;
|
|
164
|
+
for (auto& t : vec)
|
|
165
|
+
{
|
|
166
|
+
ret.emplace_back(std::make_pair(k++, t));
|
|
167
|
+
}
|
|
168
|
+
std::sort(ret.begin(), ret.end(), [](const pair_t& a, const pair_t& b)
|
|
169
|
+
{
|
|
170
|
+
return a.second > b.second;
|
|
171
|
+
});
|
|
172
|
+
if (topN < ret.size()) ret.erase(ret.begin() + topN, ret.end());
|
|
173
|
+
return ret;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
namespace flags
|
|
177
|
+
{
|
|
178
|
+
enum
|
|
179
|
+
{
|
|
180
|
+
continuous_doc_data = 1 << 0,
|
|
181
|
+
shared_state = 1 << 1,
|
|
182
|
+
partitioned_multisampling = 1 << 2,
|
|
183
|
+
end_flag_of_TopicModel = 1 << 3,
|
|
184
|
+
};
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
template<typename _RandGen, size_t _Flags, typename _Interface, typename _Derived,
|
|
188
|
+
typename _DocType, typename _ModelState
|
|
189
|
+
>
|
|
190
|
+
class TopicModel : public _Interface
|
|
191
|
+
{
|
|
192
|
+
friend class Document;
|
|
193
|
+
public:
|
|
194
|
+
using DocType = _DocType;
|
|
195
|
+
protected:
|
|
196
|
+
_RandGen rg;
|
|
197
|
+
std::vector<_RandGen> localRG;
|
|
198
|
+
std::vector<Vid> words;
|
|
199
|
+
std::vector<uint32_t> wOffsetByDoc;
|
|
200
|
+
|
|
201
|
+
std::vector<DocType> docs;
|
|
202
|
+
std::vector<uint64_t> vocabCf;
|
|
203
|
+
std::vector<uint64_t> vocabDf;
|
|
204
|
+
size_t globalStep = 0;
|
|
205
|
+
_ModelState globalState, tState;
|
|
206
|
+
Dictionary dict;
|
|
207
|
+
uint64_t realV = 0; // vocab size after removing stopwords
|
|
208
|
+
uint64_t realN = 0; // total word size after removing stopwords
|
|
209
|
+
size_t maxThreads[(size_t)ParallelScheme::size] = { 0, };
|
|
210
|
+
size_t minWordCf = 0, minWordDf = 0, removeTopN = 0;
|
|
211
|
+
|
|
212
|
+
std::unique_ptr<ThreadPool> cachedPool;
|
|
213
|
+
|
|
214
|
+
void _saveModel(std::ostream& writer, bool fullModel, const std::vector<uint8_t>* extra_data) const
|
|
215
|
+
{
|
|
216
|
+
serializer::writeMany(writer,
|
|
217
|
+
serializer::to_keyz(static_cast<const _Derived*>(this)->TMID),
|
|
218
|
+
serializer::to_keyz(static_cast<const _Derived*>(this)->TWID));
|
|
219
|
+
serializer::writeTaggedMany(writer, 0x00010001,
|
|
220
|
+
serializer::to_keyz("dict"), dict,
|
|
221
|
+
serializer::to_keyz("vocabCf"), vocabCf,
|
|
222
|
+
serializer::to_keyz("vocabDf"), vocabDf,
|
|
223
|
+
serializer::to_keyz("realV"), realV,
|
|
224
|
+
serializer::to_keyz("globalStep"), globalStep,
|
|
225
|
+
serializer::to_keyz("extra"), extra_data ? *extra_data : std::vector<uint8_t>(0));
|
|
226
|
+
serializer::writeMany(writer, *static_cast<const _Derived*>(this));
|
|
227
|
+
globalState.serializerWrite(writer);
|
|
228
|
+
if (fullModel)
|
|
229
|
+
{
|
|
230
|
+
serializer::writeMany(writer, docs);
|
|
231
|
+
}
|
|
232
|
+
else
|
|
233
|
+
{
|
|
234
|
+
serializer::writeMany(writer, std::vector<size_t>{});
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
void _loadModel(std::istream& reader, std::vector<uint8_t>* extra_data)
|
|
239
|
+
{
|
|
240
|
+
auto start_pos = reader.tellg();
|
|
241
|
+
try
|
|
242
|
+
{
|
|
243
|
+
std::vector<uint8_t> extra;
|
|
244
|
+
serializer::readMany(reader,
|
|
245
|
+
serializer::to_keyz(static_cast<_Derived*>(this)->TMID),
|
|
246
|
+
serializer::to_keyz(static_cast<_Derived*>(this)->TWID));
|
|
247
|
+
serializer::readTaggedMany(reader, 0x00010001,
|
|
248
|
+
serializer::to_keyz("dict"), dict,
|
|
249
|
+
serializer::to_keyz("vocabCf"), vocabCf,
|
|
250
|
+
serializer::to_keyz("vocabDf"), vocabDf,
|
|
251
|
+
serializer::to_keyz("realV"), realV,
|
|
252
|
+
serializer::to_keyz("globalStep"), globalStep,
|
|
253
|
+
serializer::to_keyz("extra"), extra);
|
|
254
|
+
if (extra_data) *extra_data = std::move(extra);
|
|
255
|
+
}
|
|
256
|
+
catch (const std::ios_base::failure&)
|
|
257
|
+
{
|
|
258
|
+
reader.seekg(start_pos);
|
|
259
|
+
serializer::readMany(reader,
|
|
260
|
+
serializer::to_key(static_cast<_Derived*>(this)->TMID),
|
|
261
|
+
serializer::to_key(static_cast<_Derived*>(this)->TWID),
|
|
262
|
+
dict, vocabCf, realV);
|
|
263
|
+
}
|
|
264
|
+
serializer::readMany(reader, *static_cast<_Derived*>(this));
|
|
265
|
+
globalState.serializerRead(reader);
|
|
266
|
+
serializer::readMany(reader, docs);
|
|
267
|
+
realN = countRealN();
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
template<typename _DocTy>
|
|
271
|
+
typename std::enable_if<std::is_same<DocType,
|
|
272
|
+
typename std::remove_reference<typename std::remove_cv<_DocTy>::type>::type
|
|
273
|
+
>::value, size_t>::type _addDoc(_DocTy&& doc)
|
|
274
|
+
{
|
|
275
|
+
if (doc.words.empty()) return -1;
|
|
276
|
+
size_t maxWid = *std::max_element(doc.words.begin(), doc.words.end());
|
|
277
|
+
if (vocabCf.size() <= maxWid)
|
|
278
|
+
{
|
|
279
|
+
vocabCf.resize(maxWid + 1);
|
|
280
|
+
vocabDf.resize(maxWid + 1);
|
|
281
|
+
}
|
|
282
|
+
for (auto w : doc.words) ++vocabCf[w];
|
|
283
|
+
std::unordered_set<Vid> uniq{ doc.words.begin(), doc.words.end() };
|
|
284
|
+
for (auto w : uniq) ++vocabDf[w];
|
|
285
|
+
docs.emplace_back(std::forward<_DocTy>(doc));
|
|
286
|
+
return docs.size() - 1;
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
template<bool _const = false>
|
|
290
|
+
DocType _makeDoc(const std::vector<std::string>& words, Float weight = 1)
|
|
291
|
+
{
|
|
292
|
+
DocType doc{ weight };
|
|
293
|
+
for (auto& w : words)
|
|
294
|
+
{
|
|
295
|
+
Vid id;
|
|
296
|
+
if (_const)
|
|
297
|
+
{
|
|
298
|
+
id = dict.toWid(w);
|
|
299
|
+
if (id == (Vid)-1) continue;
|
|
300
|
+
}
|
|
301
|
+
else
|
|
302
|
+
{
|
|
303
|
+
id = dict.add(w);
|
|
304
|
+
}
|
|
305
|
+
doc.words.emplace_back(id);
|
|
306
|
+
}
|
|
307
|
+
return doc;
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
DocType _makeRawDoc(const std::string& rawStr, const std::vector<Vid>& words,
|
|
311
|
+
const std::vector<uint32_t>& pos, const std::vector<uint16_t>& len, Float weight = 1) const
|
|
312
|
+
{
|
|
313
|
+
DocType doc{ weight };
|
|
314
|
+
doc.rawStr = rawStr;
|
|
315
|
+
for (auto& w : words) doc.words.emplace_back(w);
|
|
316
|
+
doc.origWordPos = pos;
|
|
317
|
+
doc.origWordLen = len;
|
|
318
|
+
return doc;
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
template<bool _const, typename _FnTokenizer>
|
|
322
|
+
DocType _makeRawDoc(const std::string& rawStr, _FnTokenizer&& tokenizer, Float weight = 1)
|
|
323
|
+
{
|
|
324
|
+
DocType doc{ weight };
|
|
325
|
+
doc.rawStr = rawStr;
|
|
326
|
+
for (auto& p : tokenizer(doc.rawStr))
|
|
327
|
+
{
|
|
328
|
+
Vid wid;
|
|
329
|
+
if (_const)
|
|
330
|
+
{
|
|
331
|
+
wid = dict.toWid(std::get<0>(p));
|
|
332
|
+
if (wid == (Vid)-1) continue;
|
|
333
|
+
}
|
|
334
|
+
else
|
|
335
|
+
{
|
|
336
|
+
wid = dict.add(std::get<0>(p));
|
|
337
|
+
}
|
|
338
|
+
auto pos = std::get<1>(p);
|
|
339
|
+
auto len = std::get<2>(p);
|
|
340
|
+
doc.words.emplace_back(wid);
|
|
341
|
+
doc.origWordPos.emplace_back(pos);
|
|
342
|
+
doc.origWordLen.emplace_back(len);
|
|
343
|
+
}
|
|
344
|
+
return doc;
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
const DocType& _getDoc(size_t docId) const
|
|
348
|
+
{
|
|
349
|
+
return docs[docId];
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
void updateWeakArray()
|
|
353
|
+
{
|
|
354
|
+
wOffsetByDoc.emplace_back(0);
|
|
355
|
+
for (auto& doc : docs)
|
|
356
|
+
{
|
|
357
|
+
wOffsetByDoc.emplace_back(wOffsetByDoc.back() + doc.words.size());
|
|
358
|
+
}
|
|
359
|
+
auto tx = [](_DocType& doc) { return &doc.words; };
|
|
360
|
+
tvector<Vid>::trade(words,
|
|
361
|
+
makeTransformIter(docs.begin(), tx),
|
|
362
|
+
makeTransformIter(docs.end(), tx));
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
size_t countRealN() const
|
|
366
|
+
{
|
|
367
|
+
size_t n = 0;
|
|
368
|
+
for (auto& doc : docs)
|
|
369
|
+
{
|
|
370
|
+
for (auto& w : doc.words)
|
|
371
|
+
{
|
|
372
|
+
if (w < realV) ++n;
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
return n;
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
void removeStopwords(size_t minWordCnt, size_t minWordDf, size_t removeTopN)
|
|
379
|
+
{
|
|
380
|
+
if (minWordCnt <= 1 && minWordDf <= 1 && removeTopN == 0) realV = dict.size();
|
|
381
|
+
this->minWordCf = minWordCnt;
|
|
382
|
+
this->minWordDf = minWordDf;
|
|
383
|
+
this->removeTopN = removeTopN;
|
|
384
|
+
std::vector<std::pair<size_t, size_t>> vocabCfDf;
|
|
385
|
+
for (size_t i = 0; i < vocabCf.size(); ++i)
|
|
386
|
+
{
|
|
387
|
+
vocabCfDf.emplace_back(vocabCf[i], vocabDf[i]);
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
std::vector<Vid> order;
|
|
391
|
+
sortAndWriteOrder(vocabCfDf, order, removeTopN, [&](const std::pair<size_t, size_t>& a, const std::pair<size_t, size_t>& b)
|
|
392
|
+
{
|
|
393
|
+
if (a.first < minWordCnt || a.second < minWordDf)
|
|
394
|
+
{
|
|
395
|
+
if (b.first < minWordCnt || b.second < minWordDf)
|
|
396
|
+
{
|
|
397
|
+
return a > b;
|
|
398
|
+
}
|
|
399
|
+
return false;
|
|
400
|
+
}
|
|
401
|
+
if (b.first < minWordCnt || b.second < minWordDf)
|
|
402
|
+
{
|
|
403
|
+
return true;
|
|
404
|
+
}
|
|
405
|
+
return a > b;
|
|
406
|
+
});
|
|
407
|
+
realV = std::find_if(vocabCfDf.begin(), vocabCfDf.end() - std::min(removeTopN, vocabCfDf.size()), [&](const std::pair<size_t, size_t>& a)
|
|
408
|
+
{
|
|
409
|
+
return a.first < minWordCnt || a.second < minWordDf;
|
|
410
|
+
}) - vocabCfDf.begin();
|
|
411
|
+
|
|
412
|
+
for (size_t i = 0; i < vocabCfDf.size(); ++i)
|
|
413
|
+
{
|
|
414
|
+
vocabCf[i] = vocabCfDf[i].first;
|
|
415
|
+
vocabDf[i] = vocabCfDf[i].second;
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
dict.reorder(order);
|
|
419
|
+
realN = 0;
|
|
420
|
+
for (auto& doc : docs)
|
|
421
|
+
{
|
|
422
|
+
for (auto& w : doc.words)
|
|
423
|
+
{
|
|
424
|
+
w = order[w];
|
|
425
|
+
if (w < realV) ++realN;
|
|
426
|
+
}
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
int restoreFromTrainingError(const exception::TrainingError& e, ThreadPool& pool, _ModelState* localData, _RandGen* rgs)
|
|
431
|
+
{
|
|
432
|
+
throw e;
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
public:
|
|
436
|
+
TopicModel(size_t _rg) : rg(_rg)
|
|
437
|
+
{
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
size_t getNumDocs() const override
|
|
441
|
+
{
|
|
442
|
+
return docs.size();
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
uint64_t getN() const override
|
|
446
|
+
{
|
|
447
|
+
return realN;
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
uint64_t getV() const override
|
|
451
|
+
{
|
|
452
|
+
return realV;
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
void updateVocab(const std::vector<std::string>& words) override
|
|
456
|
+
{
|
|
457
|
+
if(dict.size()) THROW_ERROR_WITH_INFO(exception::InvalidArgument, "updateVocab after addDoc");
|
|
458
|
+
for(auto& w : words) dict.add(w);
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
void prepare(bool initDocs = true, size_t minWordCnt = 0, size_t minWordDf = 0, size_t removeTopN = 0) override
|
|
462
|
+
{
|
|
463
|
+
maxThreads[(size_t)ParallelScheme::default_] = -1;
|
|
464
|
+
maxThreads[(size_t)ParallelScheme::none] = -1;
|
|
465
|
+
maxThreads[(size_t)ParallelScheme::copy_merge] = static_cast<_Derived*>(this)->template estimateMaxThreads<ParallelScheme::copy_merge>();
|
|
466
|
+
maxThreads[(size_t)ParallelScheme::partition] = static_cast<_Derived*>(this)->template estimateMaxThreads<ParallelScheme::partition>();
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
static ParallelScheme getRealScheme(ParallelScheme ps)
|
|
470
|
+
{
|
|
471
|
+
switch (ps)
|
|
472
|
+
{
|
|
473
|
+
case ParallelScheme::default_:
|
|
474
|
+
if ((_Flags & flags::partitioned_multisampling)) return ParallelScheme::partition;
|
|
475
|
+
if ((_Flags & flags::shared_state)) return ParallelScheme::none;
|
|
476
|
+
return ParallelScheme::copy_merge;
|
|
477
|
+
case ParallelScheme::copy_merge:
|
|
478
|
+
if ((_Flags & flags::shared_state)) THROW_ERROR_WITH_INFO(exception::InvalidArgument,
|
|
479
|
+
std::string{ "This model doesn't provide ParallelScheme::" } + toString(ps));
|
|
480
|
+
break;
|
|
481
|
+
case ParallelScheme::partition:
|
|
482
|
+
if (!(_Flags & flags::partitioned_multisampling)) THROW_ERROR_WITH_INFO(exception::InvalidArgument,
|
|
483
|
+
std::string{ "This model doesn't provide ParallelScheme::" } + toString(ps));
|
|
484
|
+
break;
|
|
485
|
+
}
|
|
486
|
+
return ps;
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
int train(size_t iteration, size_t numWorkers, ParallelScheme ps) override
|
|
490
|
+
{
|
|
491
|
+
if (!numWorkers) numWorkers = std::thread::hardware_concurrency();
|
|
492
|
+
ps = getRealScheme(ps);
|
|
493
|
+
numWorkers = std::min(numWorkers, maxThreads[(size_t)ps]);
|
|
494
|
+
if (numWorkers == 1 || (_Flags & flags::shared_state)) ps = ParallelScheme::none;
|
|
495
|
+
if (!cachedPool || cachedPool->getNumWorkers() != numWorkers)
|
|
496
|
+
{
|
|
497
|
+
cachedPool = make_unique<ThreadPool>(numWorkers);
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
std::vector<_ModelState> localData;
|
|
501
|
+
|
|
502
|
+
while(localRG.size() < numWorkers)
|
|
503
|
+
{
|
|
504
|
+
localRG.emplace_back(rg());
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
for (size_t i = 0; i < numWorkers; ++i)
|
|
508
|
+
{
|
|
509
|
+
if(ps == ParallelScheme::copy_merge) localData.emplace_back(static_cast<_Derived*>(this)->globalState);
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
if (ps == ParallelScheme::partition)
|
|
513
|
+
{
|
|
514
|
+
localData.resize(numWorkers);
|
|
515
|
+
static_cast<_Derived*>(this)->updatePartition(*cachedPool, globalState, localData.data(), docs.begin(), docs.end(),
|
|
516
|
+
static_cast<_Derived*>(this)->eddTrain);
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
auto state = ps == ParallelScheme::none ? &globalState : localData.data();
|
|
520
|
+
for (size_t i = 0; i < iteration; ++i)
|
|
521
|
+
{
|
|
522
|
+
while (1)
|
|
523
|
+
{
|
|
524
|
+
try
|
|
525
|
+
{
|
|
526
|
+
switch (ps)
|
|
527
|
+
{
|
|
528
|
+
case ParallelScheme::none:
|
|
529
|
+
static_cast<_Derived*>(this)->template trainOne<ParallelScheme::none>(
|
|
530
|
+
*cachedPool, state, localRG.data());
|
|
531
|
+
break;
|
|
532
|
+
case ParallelScheme::copy_merge:
|
|
533
|
+
static_cast<_Derived*>(this)->template trainOne<ParallelScheme::copy_merge>(
|
|
534
|
+
*cachedPool, state, localRG.data());
|
|
535
|
+
break;
|
|
536
|
+
case ParallelScheme::partition:
|
|
537
|
+
static_cast<_Derived*>(this)->template trainOne<ParallelScheme::partition>(
|
|
538
|
+
*cachedPool, state, localRG.data());
|
|
539
|
+
break;
|
|
540
|
+
}
|
|
541
|
+
break;
|
|
542
|
+
}
|
|
543
|
+
catch (const exception::TrainingError& e)
|
|
544
|
+
{
|
|
545
|
+
std::cerr << e.what() << std::endl;
|
|
546
|
+
int ret = static_cast<_Derived*>(this)->restoreFromTrainingError(
|
|
547
|
+
e, *cachedPool, state, localRG.data());
|
|
548
|
+
if(ret < 0) return ret;
|
|
549
|
+
}
|
|
550
|
+
}
|
|
551
|
+
++globalStep;
|
|
552
|
+
}
|
|
553
|
+
return 0;
|
|
554
|
+
}
|
|
555
|
+
|
|
556
|
+
double getLLPerWord() const override
|
|
557
|
+
{
|
|
558
|
+
return words.empty() ? 0 : static_cast<const _Derived*>(this)->getLL() / realN;
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
double getPerplexity() const override
|
|
562
|
+
{
|
|
563
|
+
return exp(-getLLPerWord());
|
|
564
|
+
}
|
|
565
|
+
|
|
566
|
+
size_t getK() const override
|
|
567
|
+
{
|
|
568
|
+
return 0;
|
|
569
|
+
}
|
|
570
|
+
|
|
571
|
+
std::vector<Float> getWidsByTopic(size_t tid) const override
|
|
572
|
+
{
|
|
573
|
+
return static_cast<const _Derived*>(this)->_getWidsByTopic(tid);
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
std::vector<std::pair<Vid, Float>> getWidsByTopicSorted(size_t tid, size_t topN) const
|
|
577
|
+
{
|
|
578
|
+
return extractTopN<Vid>(static_cast<const _Derived*>(this)->_getWidsByTopic(tid), topN);
|
|
579
|
+
}
|
|
580
|
+
|
|
581
|
+
std::vector<std::pair<std::string, Float>> vid2String(const std::vector<std::pair<Vid, Float>>& vids) const
|
|
582
|
+
{
|
|
583
|
+
std::vector<std::pair<std::string, Float>> ret(vids.size());
|
|
584
|
+
for (size_t i = 0; i < vids.size(); ++i)
|
|
585
|
+
{
|
|
586
|
+
ret[i] = std::make_pair(dict.toWord(vids[i].first), vids[i].second);
|
|
587
|
+
}
|
|
588
|
+
return ret;
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
std::vector<std::pair<std::string, Float>> getWordsByTopicSorted(size_t tid, size_t topN) const override
|
|
592
|
+
{
|
|
593
|
+
return vid2String(getWidsByTopicSorted(tid, topN));
|
|
594
|
+
}
|
|
595
|
+
|
|
596
|
+
std::vector<std::pair<Vid, Float>> getWidsByDocSorted(const DocumentBase* doc, size_t topN) const
|
|
597
|
+
{
|
|
598
|
+
std::vector<Float> cnt(dict.size());
|
|
599
|
+
for (auto w : doc->words) cnt[w] += 1;
|
|
600
|
+
for (auto& c : cnt) c /= doc->words.size();
|
|
601
|
+
return extractTopN<Vid>(cnt, topN);
|
|
602
|
+
}
|
|
603
|
+
|
|
604
|
+
std::vector<std::pair<std::string, Float>> getWordsByDocSorted(const DocumentBase* doc, size_t topN) const override
|
|
605
|
+
{
|
|
606
|
+
return vid2String(getWidsByDocSorted(doc, topN));
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
std::vector<double> infer(const std::vector<DocumentBase*>& docs, size_t maxIter, Float tolerance, size_t numWorkers, ParallelScheme ps, bool together) const override
|
|
610
|
+
{
|
|
611
|
+
if (!numWorkers) numWorkers = std::thread::hardware_concurrency();
|
|
612
|
+
ps = getRealScheme(ps);
|
|
613
|
+
if (numWorkers == 1) ps = ParallelScheme::none;
|
|
614
|
+
auto tx = [](DocumentBase* p)->DocType& { return *static_cast<DocType*>(p); };
|
|
615
|
+
auto b = makeTransformIter(docs.begin(), tx), e = makeTransformIter(docs.end(), tx);
|
|
616
|
+
|
|
617
|
+
if (together)
|
|
618
|
+
{
|
|
619
|
+
switch (ps)
|
|
620
|
+
{
|
|
621
|
+
case ParallelScheme::none:
|
|
622
|
+
return static_cast<const _Derived*>(this)->template _infer<true, ParallelScheme::none>(b, e, maxIter, tolerance, numWorkers);
|
|
623
|
+
case ParallelScheme::copy_merge:
|
|
624
|
+
return static_cast<const _Derived*>(this)->template _infer<true, ParallelScheme::copy_merge>(b, e, maxIter, tolerance, numWorkers);
|
|
625
|
+
case ParallelScheme::partition:
|
|
626
|
+
return static_cast<const _Derived*>(this)->template _infer<true, ParallelScheme::partition>(b, e, maxIter, tolerance, numWorkers);
|
|
627
|
+
}
|
|
628
|
+
}
|
|
629
|
+
else
|
|
630
|
+
{
|
|
631
|
+
switch (ps)
|
|
632
|
+
{
|
|
633
|
+
case ParallelScheme::none:
|
|
634
|
+
return static_cast<const _Derived*>(this)->template _infer<false, ParallelScheme::none>(b, e, maxIter, tolerance, numWorkers);
|
|
635
|
+
case ParallelScheme::copy_merge:
|
|
636
|
+
return static_cast<const _Derived*>(this)->template _infer<false, ParallelScheme::copy_merge>(b, e, maxIter, tolerance, numWorkers);
|
|
637
|
+
case ParallelScheme::partition:
|
|
638
|
+
return static_cast<const _Derived*>(this)->template _infer<false, ParallelScheme::partition>(b, e, maxIter, tolerance, numWorkers);
|
|
639
|
+
}
|
|
640
|
+
}
|
|
641
|
+
THROW_ERROR_WITH_INFO(exception::InvalidArgument, "invalid ParallelScheme");
|
|
642
|
+
}
|
|
643
|
+
|
|
644
|
+
std::vector<Float> getTopicsByDoc(const DocumentBase* doc) const override
|
|
645
|
+
{
|
|
646
|
+
return static_cast<const _Derived*>(this)->getTopicsByDoc(*static_cast<const DocType*>(doc));
|
|
647
|
+
}
|
|
648
|
+
|
|
649
|
+
std::vector<std::pair<Tid, Float>> getTopicsByDocSorted(const DocumentBase* doc, size_t topN) const override
|
|
650
|
+
{
|
|
651
|
+
return extractTopN<Tid>(getTopicsByDoc(doc), topN);
|
|
652
|
+
}
|
|
653
|
+
|
|
654
|
+
|
|
655
|
+
const DocumentBase* getDoc(size_t docId) const override
|
|
656
|
+
{
|
|
657
|
+
return &_getDoc(docId);
|
|
658
|
+
}
|
|
659
|
+
|
|
660
|
+
size_t getGlobalStep() const override
|
|
661
|
+
{
|
|
662
|
+
return globalStep;
|
|
663
|
+
}
|
|
664
|
+
|
|
665
|
+
const Dictionary& getVocabDict() const override
|
|
666
|
+
{
|
|
667
|
+
return dict;
|
|
668
|
+
}
|
|
669
|
+
|
|
670
|
+
const std::vector<uint64_t>& getVocabCf() const override
|
|
671
|
+
{
|
|
672
|
+
return vocabCf;
|
|
673
|
+
}
|
|
674
|
+
|
|
675
|
+
const std::vector<uint64_t>& getVocabDf() const override
|
|
676
|
+
{
|
|
677
|
+
return vocabDf;
|
|
678
|
+
}
|
|
679
|
+
|
|
680
|
+
void saveModel(std::ostream& writer, bool fullModel, const std::vector<uint8_t>* extra_data) const override
|
|
681
|
+
{
|
|
682
|
+
static_cast<const _Derived*>(this)->_saveModel(writer, fullModel, extra_data);
|
|
683
|
+
}
|
|
684
|
+
|
|
685
|
+
void loadModel(std::istream& reader, std::vector<uint8_t>* extra_data) override
|
|
686
|
+
{
|
|
687
|
+
static_cast<_Derived*>(this)->_loadModel(reader, extra_data);
|
|
688
|
+
static_cast<_Derived*>(this)->prepare(false);
|
|
689
|
+
}
|
|
690
|
+
};
|
|
691
|
+
|
|
692
|
+
}
|