RubyGems - isotree - Versions diffs - 0.2.0 → 0.3.0 - Mend

isotree 0.2.0 → 0.3.0

Files changed (152) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +16 -1
data/LICENSE.txt +2 -2
data/README.md +41 -23
data/ext/isotree/ext.cpp +144 -31
data/ext/isotree/extconf.rb +7 -7
data/lib/isotree/dataset.rb +0 -1
data/lib/isotree/isolation_forest.rb +114 -30
data/lib/isotree/version.rb +1 -1
data/vendor/isotree/LICENSE +1 -1
data/vendor/isotree/README.md +165 -27
data/vendor/isotree/include/isotree.hpp +2111 -0
data/vendor/isotree/include/isotree_oop.hpp +394 -0
data/vendor/isotree/inst/COPYRIGHTS +62 -0
data/vendor/isotree/src/RcppExports.cpp +525 -52
data/vendor/isotree/src/Rwrapper.cpp +1931 -268
data/vendor/isotree/src/c_interface.cpp +953 -0
data/vendor/isotree/src/crit.hpp +4232 -0
data/vendor/isotree/src/dist.hpp +1886 -0
data/vendor/isotree/src/exp_depth_table.hpp +134 -0
data/vendor/isotree/src/extended.hpp +1444 -0
data/vendor/isotree/src/external_facing_generic.hpp +399 -0
data/vendor/isotree/src/fit_model.hpp +2401 -0
data/vendor/isotree/src/{dealloc.cpp → headers_joined.hpp} +38 -22
data/vendor/isotree/src/helpers_iforest.hpp +813 -0
data/vendor/isotree/src/{impute.cpp → impute.hpp} +353 -122
data/vendor/isotree/src/indexer.cpp +515 -0
data/vendor/isotree/src/instantiate_template_headers.cpp +118 -0
data/vendor/isotree/src/instantiate_template_headers.hpp +240 -0
data/vendor/isotree/src/isoforest.hpp +1659 -0
data/vendor/isotree/src/isotree.hpp +1804 -392
data/vendor/isotree/src/isotree_exportable.hpp +99 -0
data/vendor/isotree/src/merge_models.cpp +159 -16
data/vendor/isotree/src/mult.hpp +1321 -0
data/vendor/isotree/src/oop_interface.cpp +842 -0
data/vendor/isotree/src/oop_interface.hpp +278 -0
data/vendor/isotree/src/other_helpers.hpp +219 -0
data/vendor/isotree/src/predict.hpp +1932 -0
data/vendor/isotree/src/python_helpers.hpp +134 -0
data/vendor/isotree/src/ref_indexer.hpp +154 -0
data/vendor/isotree/src/robinmap/LICENSE +21 -0
data/vendor/isotree/src/robinmap/README.md +483 -0
data/vendor/isotree/src/robinmap/include/tsl/robin_growth_policy.h +406 -0
data/vendor/isotree/src/robinmap/include/tsl/robin_hash.h +1620 -0
data/vendor/isotree/src/robinmap/include/tsl/robin_map.h +807 -0
data/vendor/isotree/src/robinmap/include/tsl/robin_set.h +660 -0
data/vendor/isotree/src/serialize.cpp +4300 -139
data/vendor/isotree/src/sql.cpp +141 -59
data/vendor/isotree/src/subset_models.cpp +174 -0
data/vendor/isotree/src/utils.hpp +3808 -0
data/vendor/isotree/src/xoshiro.hpp +467 -0
data/vendor/isotree/src/ziggurat.hpp +405 -0
metadata +40 -106
data/vendor/cereal/LICENSE +0 -24
data/vendor/cereal/README.md +0 -85
data/vendor/cereal/include/cereal/access.hpp +0 -351
data/vendor/cereal/include/cereal/archives/adapters.hpp +0 -163
data/vendor/cereal/include/cereal/archives/binary.hpp +0 -169
data/vendor/cereal/include/cereal/archives/json.hpp +0 -1019
data/vendor/cereal/include/cereal/archives/portable_binary.hpp +0 -334
data/vendor/cereal/include/cereal/archives/xml.hpp +0 -956
data/vendor/cereal/include/cereal/cereal.hpp +0 -1089
data/vendor/cereal/include/cereal/details/helpers.hpp +0 -422
data/vendor/cereal/include/cereal/details/polymorphic_impl.hpp +0 -796
data/vendor/cereal/include/cereal/details/polymorphic_impl_fwd.hpp +0 -65
data/vendor/cereal/include/cereal/details/static_object.hpp +0 -127
data/vendor/cereal/include/cereal/details/traits.hpp +0 -1411
data/vendor/cereal/include/cereal/details/util.hpp +0 -84
data/vendor/cereal/include/cereal/external/base64.hpp +0 -134
data/vendor/cereal/include/cereal/external/rapidjson/allocators.h +0 -284
data/vendor/cereal/include/cereal/external/rapidjson/cursorstreamwrapper.h +0 -78
data/vendor/cereal/include/cereal/external/rapidjson/document.h +0 -2652
data/vendor/cereal/include/cereal/external/rapidjson/encodedstream.h +0 -299
data/vendor/cereal/include/cereal/external/rapidjson/encodings.h +0 -716
data/vendor/cereal/include/cereal/external/rapidjson/error/en.h +0 -74
data/vendor/cereal/include/cereal/external/rapidjson/error/error.h +0 -161
data/vendor/cereal/include/cereal/external/rapidjson/filereadstream.h +0 -99
data/vendor/cereal/include/cereal/external/rapidjson/filewritestream.h +0 -104
data/vendor/cereal/include/cereal/external/rapidjson/fwd.h +0 -151
data/vendor/cereal/include/cereal/external/rapidjson/internal/biginteger.h +0 -290
data/vendor/cereal/include/cereal/external/rapidjson/internal/diyfp.h +0 -271
data/vendor/cereal/include/cereal/external/rapidjson/internal/dtoa.h +0 -245
data/vendor/cereal/include/cereal/external/rapidjson/internal/ieee754.h +0 -78
data/vendor/cereal/include/cereal/external/rapidjson/internal/itoa.h +0 -308
data/vendor/cereal/include/cereal/external/rapidjson/internal/meta.h +0 -186
data/vendor/cereal/include/cereal/external/rapidjson/internal/pow10.h +0 -55
data/vendor/cereal/include/cereal/external/rapidjson/internal/regex.h +0 -740
data/vendor/cereal/include/cereal/external/rapidjson/internal/stack.h +0 -232
data/vendor/cereal/include/cereal/external/rapidjson/internal/strfunc.h +0 -69
data/vendor/cereal/include/cereal/external/rapidjson/internal/strtod.h +0 -290
data/vendor/cereal/include/cereal/external/rapidjson/internal/swap.h +0 -46
data/vendor/cereal/include/cereal/external/rapidjson/istreamwrapper.h +0 -128
data/vendor/cereal/include/cereal/external/rapidjson/memorybuffer.h +0 -70
data/vendor/cereal/include/cereal/external/rapidjson/memorystream.h +0 -71
data/vendor/cereal/include/cereal/external/rapidjson/msinttypes/inttypes.h +0 -316
data/vendor/cereal/include/cereal/external/rapidjson/msinttypes/stdint.h +0 -300
data/vendor/cereal/include/cereal/external/rapidjson/ostreamwrapper.h +0 -81
data/vendor/cereal/include/cereal/external/rapidjson/pointer.h +0 -1414
data/vendor/cereal/include/cereal/external/rapidjson/prettywriter.h +0 -277
data/vendor/cereal/include/cereal/external/rapidjson/rapidjson.h +0 -656
data/vendor/cereal/include/cereal/external/rapidjson/reader.h +0 -2230
data/vendor/cereal/include/cereal/external/rapidjson/schema.h +0 -2497
data/vendor/cereal/include/cereal/external/rapidjson/stream.h +0 -223
data/vendor/cereal/include/cereal/external/rapidjson/stringbuffer.h +0 -121
data/vendor/cereal/include/cereal/external/rapidjson/writer.h +0 -709
data/vendor/cereal/include/cereal/external/rapidxml/license.txt +0 -52
data/vendor/cereal/include/cereal/external/rapidxml/manual.html +0 -406
data/vendor/cereal/include/cereal/external/rapidxml/rapidxml.hpp +0 -2624
data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_iterators.hpp +0 -175
data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_print.hpp +0 -428
data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_utils.hpp +0 -123
data/vendor/cereal/include/cereal/macros.hpp +0 -154
data/vendor/cereal/include/cereal/specialize.hpp +0 -139
data/vendor/cereal/include/cereal/types/array.hpp +0 -79
data/vendor/cereal/include/cereal/types/atomic.hpp +0 -55
data/vendor/cereal/include/cereal/types/base_class.hpp +0 -203
data/vendor/cereal/include/cereal/types/bitset.hpp +0 -176
data/vendor/cereal/include/cereal/types/boost_variant.hpp +0 -164
data/vendor/cereal/include/cereal/types/chrono.hpp +0 -72
data/vendor/cereal/include/cereal/types/common.hpp +0 -129
data/vendor/cereal/include/cereal/types/complex.hpp +0 -56
data/vendor/cereal/include/cereal/types/concepts/pair_associative_container.hpp +0 -73
data/vendor/cereal/include/cereal/types/deque.hpp +0 -62
data/vendor/cereal/include/cereal/types/forward_list.hpp +0 -68
data/vendor/cereal/include/cereal/types/functional.hpp +0 -43
data/vendor/cereal/include/cereal/types/list.hpp +0 -62
data/vendor/cereal/include/cereal/types/map.hpp +0 -36
data/vendor/cereal/include/cereal/types/memory.hpp +0 -425
data/vendor/cereal/include/cereal/types/optional.hpp +0 -66
data/vendor/cereal/include/cereal/types/polymorphic.hpp +0 -483
data/vendor/cereal/include/cereal/types/queue.hpp +0 -132
data/vendor/cereal/include/cereal/types/set.hpp +0 -103
data/vendor/cereal/include/cereal/types/stack.hpp +0 -76
data/vendor/cereal/include/cereal/types/string.hpp +0 -61
data/vendor/cereal/include/cereal/types/tuple.hpp +0 -123
data/vendor/cereal/include/cereal/types/unordered_map.hpp +0 -36
data/vendor/cereal/include/cereal/types/unordered_set.hpp +0 -99
data/vendor/cereal/include/cereal/types/utility.hpp +0 -47
data/vendor/cereal/include/cereal/types/valarray.hpp +0 -89
data/vendor/cereal/include/cereal/types/variant.hpp +0 -109
data/vendor/cereal/include/cereal/types/vector.hpp +0 -112
data/vendor/cereal/include/cereal/version.hpp +0 -52
data/vendor/isotree/src/Makevars +0 -4
data/vendor/isotree/src/crit.cpp +0 -912
data/vendor/isotree/src/dist.cpp +0 -749
data/vendor/isotree/src/extended.cpp +0 -790
data/vendor/isotree/src/fit_model.cpp +0 -1090
data/vendor/isotree/src/helpers_iforest.cpp +0 -324
data/vendor/isotree/src/isoforest.cpp +0 -771
data/vendor/isotree/src/mult.cpp +0 -607
data/vendor/isotree/src/predict.cpp +0 -853
data/vendor/isotree/src/utils.cpp +0 -1566

data/vendor/isotree/src/predict.cpp DELETED Viewed

@@ -1,853 +0,0 @@
-/*    Isolation forests and variations thereof, with adjustments for incorporation
-*     of categorical variables and missing values.
-*     Writen for C++11 standard and aimed at being used in R and Python.
-*
-*     This library is based on the following works:
-*     [1] Liu, Fei Tony, Kai Ming Ting, and Zhi-Hua Zhou.
-*         "Isolation forest."
-*         2008 Eighth IEEE International Conference on Data Mining. IEEE, 2008.
-*     [2] Liu, Fei Tony, Kai Ming Ting, and Zhi-Hua Zhou.
-*         "Isolation-based anomaly detection."
-*         ACM Transactions on Knowledge Discovery from Data (TKDD) 6.1 (2012): 3.
-*     [3] Hariri, Sahand, Matias Carrasco Kind, and Robert J. Brunner.
-*         "Extended Isolation Forest."
-*         arXiv preprint arXiv:1811.02141 (2018).
-*     [4] Liu, Fei Tony, Kai Ming Ting, and Zhi-Hua Zhou.
-*         "On detecting clustered anomalies using SCiForest."
-*         Joint European Conference on Machine Learning and Knowledge Discovery in Databases. Springer, Berlin, Heidelberg, 2010.
-*     [5] https://sourceforge.net/projects/iforest/
-*     [6] https://math.stackexchange.com/questions/3388518/expected-number-of-paths-required-to-separate-elements-in-a-binary-tree
-*     [7] Quinlan, J. Ross. C4. 5: programs for machine learning. Elsevier, 2014.
-*     [8] Cortes, David. "Distance approximation using Isolation Forests." arXiv preprint arXiv:1910.12362 (2019).
-*     [9] Cortes, David. "Imputing missing values with unsupervised random trees." arXiv preprint arXiv:1911.06646 (2019).
-*
-*     BSD 2-Clause License
-*     Copyright (c) 2020, David Cortes
-*     All rights reserved.
-*     Redistribution and use in source and binary forms, with or without
-*     modification, are permitted provided that the following conditions are met:
-*     * Redistributions of source code must retain the above copyright notice, this
-*       list of conditions and the following disclaimer.
-*     * Redistributions in binary form must reproduce the above copyright notice,
-*       this list of conditions and the following disclaimer in the documentation
-*       and/or other materials provided with the distribution.
-*     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-*     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-*     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-*     DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
-*     FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-*     DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-*     SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-*     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-*     OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-*     OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-#include "isotree.hpp"
-/* Predict outlier score, average depth, or terminal node numbers
-*
-* Parameters
-* ==========
-* - numeric_data[nrows * ncols_numeric]
-*       Pointer to numeric data for which to make predictions. Must be ordered by columns like Fortran,
-*       not ordered by rows like C (i.e. entries 1..n contain column 0, n+1..2n column 1, etc.),
-*       and the column order must be the same as in the data that was used to fit the model.
-*       Pass NULL if there are no dense numeric columns.
-*       Can only pass one of 'numeric_data', 'Xc' + 'Xc_ind' + 'Xc_indptr', 'Xr' + 'Xr_ind' + 'Xr_indptr'.
-* - categ_data[nrows * ncols_categ]
-*       Pointer to categorical data for which to make predictions. Must be ordered by columns like Fortran,
-*       not ordered by rows like C (i.e. entries 1..n contain column 0, n+1..2n column 1, etc.),
-*       and the column order must be the same as in the data that was used to fit the model.
-*       Pass NULL if there are no categorical columns.
-*       Each category should be represented as an integer, and these integers must start at zero and
-*       be in consecutive order - i.e. if category '3' is present, category '2' must have also been
-*       present when the model was fit (note that they are not treated as being ordinal, this is just
-*       an encoding). Missing values should be encoded as negative numbers such as (-1). The encoding
-*       must be the same as was used in the data to which the model was fit.
-* - Xc[nnz]
-*       Pointer to numeric data in sparse numeric matrix in CSC format (column-compressed).
-*       Pass NULL if there are no sparse numeric columns.
-*       Can only pass one of 'numeric_data', 'Xc' + 'Xc_ind' + 'Xc_indptr', 'Xr' + 'Xr_ind' + 'Xr_indptr'.
-* - Xc_ind[nnz]
-*       Pointer to row indices to which each non-zero entry in 'Xc' corresponds.
-*       Pass NULL if there are no sparse numeric columns in CSC format.
-* - Xc_indptr[ncols_categ + 1]
-*       Pointer to column index pointers that tell at entry [col] where does column 'col'
-*       start and at entry [col + 1] where does column 'col' end.
-*       Pass NULL if there are no sparse numeric columns in CSC format.
-* - Xr[nnz]
-*       Pointer to numeric data in sparse numeric matrix in CSR format (row-compressed).
-*       Pass NULL if there are no sparse numeric columns.
-*       Can only pass one of 'numeric_data', 'Xc' + 'Xc_ind' + 'Xc_indptr', 'Xr' + 'Xr_ind' + 'Xr_indptr'.
-* - Xr_ind[nnz]
-*       Pointer to column indices to which each non-zero entry in 'Xr' corresponds.
-*       Pass NULL if there are no sparse numeric columns in CSR format.
-* - Xr_indptr[nrows + 1]
-*       Pointer to row index pointers that tell at entry [row] where does row 'row'
-*       start and at entry [row + 1] where does row 'row' end.
-*       Pass NULL if there are no sparse numeric columns in CSR format.
-* - nrows
-*       Number of rows in 'numeric_data', 'Xc', 'Xr, 'categ_data'.
-* - nthreads
-*       Number of parallel threads to use. Note that, the more threads, the more memory will be
-*       allocated, even if the thread does not end up being used. Ignored when not building with
-*       OpenMP support.
-* - standardize
-*       Whether to standardize the average depths for each row according to their relative magnitude
-*       compared to the expected average, in order to obtain an outlier score. If passing 'false',
-*       will output the average depth instead.
-*       Ignored when not passing 'output_depths'.
-* - model_outputs
-*       Pointer to fitted single-variable model object from function 'fit_iforest'. Pass NULL
-*       if the predictions are to be made from an extended model. Can only pass one of
-*       'model_outputs' and 'model_outputs_ext'.
-* - model_outputs_ext
-*       Pointer to fitted extended model object from function 'fit_iforest'. Pass NULL
-*       if the predictions are to be made from a single-variable model. Can only pass one of
-*       'model_outputs' and 'model_outputs_ext'.
-* - output_depths[nrows] (out)
-*       Pointer to array where the output average depths or outlier scores will be written into
-*       (the return type is control according to parameter 'standardize').
-*       Must already be initialized to zeros. Must also be passed and when the desired output
-*       is terminal node numbers.
-* - tree_num[nrows * ntrees] (out)
-*       Pointer to array where the output terminal node numbers will be written into.
-*       Note that the mapping between tree node and terminal tree node is not stored in
-*       the model object for efficiency reasons, so this mapping will be determined on-the-fly
-*       when passing this parameter, and as such, there will be some overhead regardless of
-*       the actual number of rows. Pass NULL if only average depths or outlier scores are desired.
-*/
-void predict_iforest(double numeric_data[], int categ_data[],
-                     double Xc[], sparse_ix Xc_ind[], sparse_ix Xc_indptr[],
-                     double Xr[], sparse_ix Xr_ind[], sparse_ix Xr_indptr[],
-                     size_t nrows, int nthreads, bool standardize,
-                     IsoForest *model_outputs, ExtIsoForest *model_outputs_ext,
-                     double output_depths[],   sparse_ix tree_num[])
-{
-    /* put data in a struct for passing it in fewer lines */
-    PredictionData prediction_data = {numeric_data, categ_data, nrows,
-                                      Xc, Xc_ind, Xc_indptr,
-                                      Xr, Xr_ind, Xr_indptr};
-    if ((size_t)nthreads > nrows)
-        nthreads = nrows;
-    if (model_outputs != NULL)
-    {
-        if (
-            model_outputs->missing_action == Fail &&
-            (model_outputs->new_cat_action != Weighted || prediction_data.categ_data == NULL) &&
-            prediction_data.Xc_indptr == NULL && prediction_data.Xr_indptr == NULL
-            )
-        {
-            #pragma omp parallel for schedule(static) num_threads(nthreads) shared(nrows, model_outputs, prediction_data, output_depths, tree_num)
-            for (size_t_for row = 0; row < nrows; row++)
-            {
-                for (std::vector<IsoTree> &tree : model_outputs->trees)
-                {
-                    traverse_itree_no_recurse(tree,
-                                              *model_outputs,
-                                              prediction_data,
-                                              output_depths[row],
-                                              (tree_num == NULL)? NULL : tree_num + nrows * (&tree - &(model_outputs->trees[0])),
-                                              (size_t) row);
-                }
-            }
-        }
-        else
-        {
-            #pragma omp parallel for schedule(static) num_threads(nthreads) shared(nrows, model_outputs, prediction_data, output_depths, tree_num)
-            for (size_t_for row = 0; row < nrows; row++)
-            {
-                for (std::vector<IsoTree> &tree : model_outputs->trees)
-                {
-                    output_depths[row] += traverse_itree(tree,
-                                                         *model_outputs,
-                                                         prediction_data,
-                                                         NULL, NULL, 0,
-                                                         (size_t) row,
-                                                         (tree_num == NULL)? NULL : tree_num + nrows * (&tree - &(model_outputs->trees[0])),
-                                                         (size_t) 0);
-                }
-            }
-        }
-    }
-    else
-    {
-        if (
-            model_outputs_ext->missing_action == Fail &&
-            prediction_data.categ_data == NULL &&
-            prediction_data.Xc_indptr == NULL &&
-            prediction_data.Xr_indptr == NULL
-            )
-        {
-            #pragma omp parallel for schedule(static) num_threads(nthreads) shared(nrows, model_outputs_ext, prediction_data, output_depths, tree_num)
-            for (size_t_for row = 0; row < nrows; row++)
-            {
-                for (std::vector<IsoHPlane> &hplane : model_outputs_ext->hplanes)
-                {
-                    traverse_hplane_fast(hplane,
-                                         *model_outputs_ext,
-                                         prediction_data,
-                                         output_depths[row],
-                                         (tree_num == NULL)? NULL : tree_num + nrows * (&hplane - &(model_outputs_ext->hplanes[0])),
-                                         (size_t) row);
-                }
-            }
-        }
-        else
-        {
-            #pragma omp parallel for schedule(static) num_threads(nthreads) shared(nrows, model_outputs_ext, prediction_data, output_depths, tree_num)
-            for (size_t_for row = 0; row < nrows; row++)
-            {
-                for (std::vector<IsoHPlane> &hplane : model_outputs_ext->hplanes)
-                {
-                    traverse_hplane(hplane,
-                                    *model_outputs_ext,
-                                    prediction_data,
-                                    output_depths[row],
-                                    NULL, NULL,
-                                    (tree_num == NULL)? NULL : tree_num + nrows * (&hplane - &(model_outputs_ext->hplanes[0])),
-                                    (size_t) row);
-                }
-            }
-        }
-    }
-    /* translate sum-of-depths to outlier score */
-    double ntrees, depth_divisor;
-    if (model_outputs != NULL)
-    {
-        ntrees = (double) model_outputs->trees.size();
-        depth_divisor = ntrees * (model_outputs->exp_avg_depth);
-    }
-    else
-    {
-        ntrees = (double) model_outputs_ext->hplanes.size();
-        depth_divisor = ntrees * (model_outputs_ext->exp_avg_depth);
-    }
-    if (standardize)
-        #pragma omp parallel for schedule(static) num_threads(nthreads) shared(nrows, output_depths, depth_divisor)
-        for (size_t_for row = 0; row < nrows; row++)
-            output_depths[row] = exp2( - output_depths[row] / depth_divisor );
-    else
-        #pragma omp parallel for schedule(static) num_threads(nthreads) shared(nrows, output_depths, ntrees)
-        for (size_t_for row = 0; row < nrows; row++)
-            output_depths[row] /= ntrees;
-    /* re-map tree numbers to start at zero (if predicting tree numbers) */
-    /* Note: usually this type of 'prediction' is not required,
-       thus this mapping is not stored in the model objects so as to
-       save memory */
-    if (tree_num != NULL)
-        remap_terminal_trees(model_outputs, model_outputs_ext,
-                             prediction_data, tree_num, nthreads);
-}
-/* TODO: these functions would be faster if done with row-major order,
-   should at least give the option of taking arrays as row-major. */
-void traverse_itree_no_recurse(std::vector<IsoTree>  &tree,
-                               IsoForest             &model_outputs,
-                               PredictionData        &prediction_data,
-                               double                &output_depth,
-                               sparse_ix *restrict   tree_num,
-                               size_t                row)
-{
-    size_t curr_lev = 0;
-    double xval;
-    while (true)
-    {
-        if (tree[curr_lev].score > 0)
-        {
-            output_depth += tree[curr_lev].score;
-            if (tree_num != NULL)
-                tree_num[row] = curr_lev;
-            break;
-        }
-        else
-        {
-            switch(tree[curr_lev].col_type)
-            {
-                case Numeric:
-                {
-                    xval = prediction_data.numeric_data[row +  tree[curr_lev].col_num * prediction_data.nrows];
-                    curr_lev = (xval <= tree[curr_lev].num_split)?
-                                tree[curr_lev].tree_left : tree[curr_lev].tree_right;
-                    output_depth += (xval < tree[curr_lev].range_low) || (xval > tree[curr_lev].range_high);
-                    break;
-                }
-                case Categorical:
-                {
-                    switch(model_outputs.cat_split_type)
-                    {
-                        case SubSet:
-                        {
-                            if (!tree[curr_lev].cat_split.size()) /* this is for binary columns */
-                            {
-                                if (prediction_data.categ_data[row +  tree[curr_lev].col_num * prediction_data.nrows] <= 1)
-                                {
-                                    curr_lev = (
-                                                prediction_data.categ_data[row +  tree[curr_lev].col_num * prediction_data.nrows]
-                                                    == 0
-                                                )?
-                                                tree[curr_lev].tree_left : tree[curr_lev].tree_right;
-                                }
-                                else /* can only work with 'Smallest' + no NAs if reaching this point */
-                                {
-                                    curr_lev =  (tree[curr_lev].pct_tree_left < .5)? tree[curr_lev].tree_left : tree[curr_lev].tree_right;
-                                }
-                            }
-                            else
-                            {
-                                switch(model_outputs.new_cat_action)
-                                {
-                                    case Random:
-                                    {
-                                        curr_lev = (tree[curr_lev].cat_split[
-                                                                prediction_data.categ_data[row +  tree[curr_lev].col_num * prediction_data.nrows]
-                                                                ]
-                                                    )?
-                                                    tree[curr_lev].tree_left : tree[curr_lev].tree_right;
-                                        break;
-                                    }
-                                    case Smallest:
-                                    {
-                                        if (
-                                            prediction_data.categ_data[row +  tree[curr_lev].col_num * prediction_data.nrows]
-                                                >= (int)tree[curr_lev].cat_split.size()
-                                            )
-                                        {
-                                            curr_lev =  (tree[curr_lev].pct_tree_left < .5)? tree[curr_lev].tree_left : tree[curr_lev].tree_right;
-                                        }
-                                        else
-                                        {
-                                            curr_lev = (tree[curr_lev].cat_split[
-                                                                    prediction_data.categ_data[row +  tree[curr_lev].col_num * prediction_data.nrows]
-                                                                    ]
-                                                        )?
-                                                        tree[curr_lev].tree_left : tree[curr_lev].tree_right;
-                                        }
-                                        break;
-                                    }
-                                }
-                            }
-                            break;
-                        }
-                        case SingleCateg:
-                        {
-                            curr_lev = (
-                                        prediction_data.categ_data[row +  tree[curr_lev].col_num * prediction_data.nrows]
-                                            ==
-                                        tree[curr_lev].chosen_cat
-                                        )?
-                                        tree[curr_lev].tree_left : tree[curr_lev].tree_right;
-                            break;
-                        }
-                    }
-                    break;
-                }
-            }
-        }
-    }
-}
-double traverse_itree(std::vector<IsoTree>     &tree,
-                      IsoForest                &model_outputs,
-                      PredictionData           &prediction_data,
-                      std::vector<ImputeNode> *impute_nodes,     /* only when imputing missing */
-                      ImputedData             *imputed_data,     /* only when imputing missing */
-                      double                   curr_weight,      /* only when imputing missing */
-                      size_t                   row,
-                      sparse_ix *restrict      tree_num,
-                      size_t                   curr_lev)
-{
-    double xval;
-    double range_penalty = 0;
-    sparse_ix *row_st = NULL, *row_end = NULL;
-    if (prediction_data.Xr_indptr != NULL)
-    {
-        row_st  = prediction_data.Xr_ind + prediction_data.Xr_indptr[row];
-        row_end = prediction_data.Xr_ind + prediction_data.Xr_indptr[row + 1];
-    }
-    while (true)
-    {
-        if (tree[curr_lev].score >= 0.)
-        {
-            if (tree_num != NULL)
-                tree_num[row] = curr_lev;
-            if (imputed_data != NULL)
-                add_from_impute_node((*impute_nodes)[curr_lev], *imputed_data, curr_weight);
-            return tree[curr_lev].score + range_penalty;
-        }
-        else
-        {
-            switch(tree[curr_lev].col_type)
-            {
-                case Numeric:
-                {
-                    if (prediction_data.Xc_indptr == NULL && prediction_data.Xr_indptr == NULL)
-                        xval = prediction_data.numeric_data[row +  tree[curr_lev].col_num * prediction_data.nrows];
-                    else if (prediction_data.Xc_indptr != NULL)
-                        xval = extract_spC(prediction_data, row, tree[curr_lev].col_num);
-                    else
-                        xval = extract_spR(prediction_data, row_st, row_end, tree[curr_lev].col_num);
-                    if (isnan(xval))
-                    {
-                        switch(model_outputs.missing_action)
-                        {
-                            case Divide:
-                            {
-                                return
-                                    tree[curr_lev].pct_tree_left
-                                        * traverse_itree(tree, model_outputs, prediction_data,
-                                                         impute_nodes, imputed_data, curr_weight * tree[curr_lev].pct_tree_left,
-                                                         row, NULL, tree[curr_lev].tree_left)
-                                    + (1 - tree[curr_lev].pct_tree_left)
-                                        * traverse_itree(tree, model_outputs, prediction_data,
-                                                         impute_nodes, imputed_data, curr_weight * (1 - tree[curr_lev].pct_tree_left),
-                                                         row, NULL, tree[curr_lev].tree_right)
-                                    + range_penalty;
-                            }
-                            case Impute:
-                            {
-                                curr_lev = (tree[curr_lev].pct_tree_left >= .5)?
-                                                tree[curr_lev].tree_left : tree[curr_lev].tree_right;
-                                break;
-                            }
-                            case Fail:
-                            {
-                                return NAN;
-                            }
-                        }
-                    }
-                    else
-                    {
-                        curr_lev = (xval <=tree[curr_lev].num_split)?
-                                    tree[curr_lev].tree_left : tree[curr_lev].tree_right;
-                        range_penalty += (xval < tree[curr_lev].range_low) || (xval > tree[curr_lev].range_high);
-                    }
-                    break;
-                }
-                case Categorical:
-                {
-                    if (prediction_data.categ_data[row +  tree[curr_lev].col_num * prediction_data.nrows] < 0)
-                    {
-                        switch(model_outputs.missing_action)
-                        {
-                            case Divide:
-                            {
-                                return
-                                    tree[curr_lev].pct_tree_left
-                                        * traverse_itree(tree, model_outputs, prediction_data,
-                                                         impute_nodes, imputed_data, curr_weight * tree[curr_lev].pct_tree_left,
-                                                         row, NULL, tree[curr_lev].tree_left)
-                                    + (1 - tree[curr_lev].pct_tree_left)
-                                        * traverse_itree(tree, model_outputs, prediction_data,
-                                                         impute_nodes, imputed_data, curr_weight * (1 - tree[curr_lev].pct_tree_left),
-                                                         row, NULL, tree[curr_lev].tree_right)
-                                    + range_penalty;
-                            }
-                            case Impute:
-                            {
-                                curr_lev = (tree[curr_lev].pct_tree_left >= .5)?
-                                                tree[curr_lev].tree_left : tree[curr_lev].tree_right;
-                                break;
-                            }
-                            case Fail:
-                            {
-                                return NAN;
-                            }
-                        }
-                    }
-                    else
-                    {
-                        switch(model_outputs.cat_split_type)
-                        {
-                            case SingleCateg:
-                            {
-                                curr_lev = (
-                                            prediction_data.categ_data[row +  tree[curr_lev].col_num * prediction_data.nrows]
-                                                ==
-                                            tree[curr_lev].chosen_cat
-                                            )?
-                                            tree[curr_lev].tree_left : tree[curr_lev].tree_right;
-                                break;
-                            }
-                            case SubSet:
-                            {
-                                if (!tree[curr_lev].cat_split.size())
-                                {
-                                    if (prediction_data.categ_data[row +  tree[curr_lev].col_num * prediction_data.nrows] <= 1)
-                                    {
-                                        curr_lev = (
-                                                    prediction_data.categ_data[row +  tree[curr_lev].col_num * prediction_data.nrows]
-                                                        == 0
-                                                    )?
-                                                    tree[curr_lev].tree_left : tree[curr_lev].tree_right;
-                                    }
-                                    else
-                                    {
-                                        switch(model_outputs.new_cat_action)
-                                        {
-                                            case Smallest:
-                                            {
-                                                curr_lev =  (tree[curr_lev].pct_tree_left < .5)? tree[curr_lev].tree_left : tree[curr_lev].tree_right;
-                                                break;
-                                            }
-                                            case Weighted:
-                                            {
-                                                return
-                                                    tree[curr_lev].pct_tree_left
-                                                        * traverse_itree(tree, model_outputs, prediction_data,
-                                                                         impute_nodes, imputed_data, curr_weight * tree[curr_lev].pct_tree_left,
-                                                                         row, NULL, tree[curr_lev].tree_left)
-                                                    + (1 - tree[curr_lev].pct_tree_left)
-                                                        * traverse_itree(tree, model_outputs, prediction_data,
-                                                                         impute_nodes, imputed_data, curr_weight * (1 - tree[curr_lev].pct_tree_left),
-                                                                         row, NULL, tree[curr_lev].tree_right)
-                                                    + range_penalty;
-                                            }
-                                        }
-                                    }
-                                }
-                                else
-                                {
-                                    switch(model_outputs.new_cat_action)
-                                    {
-                                        case Random:
-                                        {
-                                            curr_lev = (tree[curr_lev].cat_split[
-                                                                    prediction_data.categ_data[row +  tree[curr_lev].col_num * prediction_data.nrows]
-                                                                    ]
-                                                        )?
-                                                        tree[curr_lev].tree_left : tree[curr_lev].tree_right;
-                                            break;
-                                        }
-                                        case Smallest:
-                                        {
-                                            if (
-                                                prediction_data.categ_data[row +  tree[curr_lev].col_num * prediction_data.nrows]
-                                                    >= (int)tree[curr_lev].cat_split.size()
-                                                )
-                                            {
-                                                curr_lev =  (tree[curr_lev].pct_tree_left < .5)? tree[curr_lev].tree_left : tree[curr_lev].tree_right;
-                                            }
-                                            else
-                                            {
-                                                curr_lev = (tree[curr_lev].cat_split[
-                                                                        prediction_data.categ_data[row +  tree[curr_lev].col_num * prediction_data.nrows]
-                                                                        ]
-                                                            )?
-                                                            tree[curr_lev].tree_left : tree[curr_lev].tree_right;
-                                            }
-                                            break;
-                                        }
-                                        case Weighted:
-                                        {
-                                            if (
-                                                prediction_data.categ_data[row +  tree[curr_lev].col_num * prediction_data.nrows]
-                                                    >= (int)tree[curr_lev].cat_split.size()
-                                                ||
-                                                tree[curr_lev].cat_split[
-                                                            prediction_data.categ_data[row +  tree[curr_lev].col_num * prediction_data.nrows]
-                                                            ]
-                                                    == (-1)
-                                                )
-                                            {
-                                                return
-                                                    tree[curr_lev].pct_tree_left
-                                                        * traverse_itree(tree, model_outputs, prediction_data,
-                                                                         impute_nodes, imputed_data, curr_weight * tree[curr_lev].pct_tree_left,
-                                                                         row, NULL, tree[curr_lev].tree_left)
-                                                    + (1 - tree[curr_lev].pct_tree_left)
-                                                        * traverse_itree(tree, model_outputs, prediction_data,
-                                                                         impute_nodes, imputed_data, curr_weight * (1 - tree[curr_lev].pct_tree_left),
-                                                                         row, NULL, tree[curr_lev].tree_right)
-                                                    + range_penalty;
-                                            }
-                                            else
-                                            {
-                                                curr_lev = (tree[curr_lev].cat_split[
-                                                                        prediction_data.categ_data[row +  tree[curr_lev].col_num * prediction_data.nrows]
-                                                                        ]
-                                                            )?
-                                                            tree[curr_lev].tree_left : tree[curr_lev].tree_right;
-                                            }
-                                            break;
-                                        }
-                                    }
-                                }
-                                break;
-                            }
-                        }
-                    }
-                    break;
-                }
-            }
-        }
-    }
-}
-/* this is a simpler version for situations in which there is
-   only numeric data in dense arrays and no missing values */
-void traverse_hplane_fast(std::vector<IsoHPlane>  &hplane,
-                          ExtIsoForest            &model_outputs,
-                          PredictionData          &prediction_data,
-                          double                  &output_depth,
-                          sparse_ix *restrict     tree_num,
-                          size_t                  row)
-{
-    size_t  curr_lev = 0;
-    double  hval;
-    while(true)
-    {
-        if (hplane[curr_lev].score > 0)
-        {
-            output_depth += hplane[curr_lev].score;
-            if (tree_num != NULL)
-                tree_num[row] = curr_lev;
-            return;
-        }
-        else
-        {
-            hval = 0;
-            for (size_t col = 0; col < hplane[curr_lev].col_num.size(); col++)
-                hval += (prediction_data.numeric_data[row +  hplane[curr_lev].col_num[col] * prediction_data.nrows]
-                         - hplane[curr_lev].mean[col]) * hplane[curr_lev].coef[col];
-        }
-        output_depth += (hval < hplane[curr_lev].range_low) ||
-                        (hval > hplane[curr_lev].range_high);
-        curr_lev      = (hval <= hplane[curr_lev].split_point)?
-                         hplane[curr_lev].hplane_left : hplane[curr_lev].hplane_right;
-    }
-}
-/* this is the full version that works with potentially missing values, sparse matrices, and categoricals */
-void traverse_hplane(std::vector<IsoHPlane>   &hplane,
-                     ExtIsoForest             &model_outputs,
-                     PredictionData           &prediction_data,
-                     double                   &output_depth,
-                     std::vector<ImputeNode> *impute_nodes,     /* only when imputing missing */
-                     ImputedData             *imputed_data,     /* only when imputing missing */
-                     sparse_ix *restrict      tree_num,
-                     size_t                   row)
-{
-    size_t  curr_lev = 0;
-    double  xval;
-    int     cval;
-    double  hval;
-    size_t ncols_numeric, ncols_categ;
-    sparse_ix *row_st = NULL, *row_end = NULL;
-    if (prediction_data.Xr_indptr != NULL)
-    {
-        row_st  = prediction_data.Xr_ind + prediction_data.Xr_indptr[row];
-        row_end = prediction_data.Xr_ind + prediction_data.Xr_indptr[row + 1];
-    }
-    while(true)
-    {
-        if (hplane[curr_lev].score > 0)
-        {
-            output_depth += hplane[curr_lev].score;
-            if (tree_num != NULL)
-                tree_num[row] = curr_lev;
-            if (imputed_data != NULL)
-            {
-                add_from_impute_node((*impute_nodes)[curr_lev], *imputed_data, (double)1);
-            }
-            return;
-        }
-        else
-        {
-            hval = 0;
-            ncols_numeric = 0; ncols_categ = 0;
-            for (size_t col = 0; col < hplane[curr_lev].col_num.size(); col++)
-            {
-                switch(hplane[curr_lev].col_type[col])
-                {
-                    case Numeric:
-                    {
-                        if (prediction_data.Xc_indptr == NULL && prediction_data.Xr_indptr == NULL)
-                            xval = prediction_data.numeric_data[row +  hplane[curr_lev].col_num[col] * prediction_data.nrows];
-                        else if (prediction_data.Xc_indptr != NULL)
-                            xval = extract_spC(prediction_data, row, hplane[curr_lev].col_num[col]);
-                        else
-                            xval = extract_spR(prediction_data, row_st, row_end, hplane[curr_lev].col_num[col]);
-                        if (is_na_or_inf(xval))
-                        {
-                            if (model_outputs.missing_action != Fail)
-                            {
-                                hval += hplane[curr_lev].fill_val[col];
-                            }
-                            else
-                            {
-                                output_depth = NAN;
-                                return;
-                            }
-                        }
-                        else
-                        {
-                            hval += (xval - hplane[curr_lev].mean[ncols_numeric]) * hplane[curr_lev].coef[ncols_numeric];
-                        }
-                        ncols_numeric++;
-                        break;
-                    }
-                    case Categorical:
-                    {
-                        cval = prediction_data.categ_data[row +  hplane[curr_lev].col_num[col] * prediction_data.nrows];
-                        if (cval < 0)
-                        {
-                            if (model_outputs.missing_action != Fail)
-                            {
-                                hval += hplane[curr_lev].fill_val[col];
-                            }
-                            else
-                            {
-                                output_depth = NAN;
-                                return;
-                            }
-                        }
-                        else
-                        {
-                            switch(model_outputs.cat_split_type)
-                            {
-                                case SingleCateg:
-                                {
-                                    hval += (cval == hplane[curr_lev].chosen_cat[ncols_categ])? hplane[curr_lev].fill_new[ncols_categ] : 0;
-                                    break;
-                                }
-                                case SubSet:
-                                {
-                                    if (cval >= (int)hplane[curr_lev].cat_coef[ncols_categ].size())
-                                        hval += hplane[curr_lev].fill_new[ncols_categ];
-                                    else
-                                        hval += hplane[curr_lev].cat_coef[ncols_categ][cval];
-                                    break;
-                                }
-                            }
-                        }
-                        ncols_categ++;
-                        break;
-                    }
-                }
-            }
-            output_depth += (hval < hplane[curr_lev].range_low) ||
-                            (hval > hplane[curr_lev].range_high);
-            curr_lev       = (hval <= hplane[curr_lev].split_point)?
-                             hplane[curr_lev].hplane_left : hplane[curr_lev].hplane_right;
-        }
-    }
-}
-double extract_spC(PredictionData &prediction_data, size_t row, size_t col_num)
-{
-    sparse_ix *search_res = std::lower_bound(prediction_data.Xc_ind + prediction_data.Xc_indptr[col_num],
-                                             prediction_data.Xc_ind + prediction_data.Xc_indptr[col_num + 1],
-                                             (sparse_ix) row);
-    if (
-        search_res == (prediction_data.Xc_ind + prediction_data.Xc_indptr[col_num + 1])
-            ||
-        *search_res != row
-        )
-        return 0.;
-    else
-        return prediction_data.Xc[search_res - prediction_data.Xc_ind];
-}
-double extract_spR(PredictionData &prediction_data, sparse_ix *row_st, sparse_ix *row_end, size_t col_num)
-{
-    if (row_end == row_st)
-        return 0.;
-    sparse_ix *search_res = std::lower_bound(row_st, row_end, (sparse_ix) col_num);
-    if (search_res == row_end || *search_res != (sparse_ix)col_num)
-        return 0.;
-    else
-        return prediction_data.Xr[search_res - prediction_data.Xr_ind];
-}
-void get_num_nodes(IsoForest &model_outputs, sparse_ix *restrict n_nodes, sparse_ix *restrict n_terminal, int nthreads)
-{
-    std::fill(n_terminal, n_terminal + model_outputs.trees.size(), 0);
-    #pragma omp parallel for schedule(static) num_threads(nthreads) shared(model_outputs, n_nodes, n_terminal)
-    for (size_t_for tree = 0; tree < model_outputs.trees.size(); tree++)
-    {
-        n_nodes[tree] = model_outputs.trees[tree].size();
-        for (IsoTree &node : model_outputs.trees[tree])
-        {
-            n_terminal[tree] += (node.score > 0);
-        }
-    }
-}
-void get_num_nodes(ExtIsoForest &model_outputs, sparse_ix *restrict n_nodes, sparse_ix *restrict n_terminal, int nthreads)
-{
-    std::fill(n_terminal, n_terminal + model_outputs.hplanes.size(), 0);
-    #pragma omp parallel for schedule(static) num_threads(nthreads) shared(model_outputs, n_nodes, n_terminal)
-    for (size_t_for hplane = 0; hplane < model_outputs.hplanes.size(); hplane++)
-    {
-        n_nodes[hplane] = model_outputs.hplanes[hplane].size();
-        for (IsoHPlane &node : model_outputs.hplanes[hplane])
-        {
-            n_terminal[hplane] += (node.score > 0);
-        }
-    }
-}