outliertree 0.1.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +14 -0
- data/NOTICE.txt +1 -1
- data/README.md +11 -10
- data/ext/outliertree/ext.cpp +104 -105
- data/ext/outliertree/extconf.rb +1 -1
- data/lib/outliertree/result.rb +3 -3
- data/lib/outliertree/version.rb +1 -1
- data/vendor/outliertree/README.md +77 -40
- data/vendor/outliertree/src/Makevars.in +4 -0
- data/vendor/outliertree/src/Makevars.win +4 -0
- data/vendor/outliertree/src/RcppExports.cpp +20 -9
- data/vendor/outliertree/src/Rwrapper.cpp +256 -57
- data/vendor/outliertree/src/cat_outlier.cpp +6 -6
- data/vendor/outliertree/src/clusters.cpp +114 -9
- data/vendor/outliertree/src/fit_model.cpp +505 -308
- data/vendor/outliertree/src/misc.cpp +165 -4
- data/vendor/outliertree/src/outlier_tree.hpp +159 -51
- data/vendor/outliertree/src/outliertree-win.def +3 -0
- data/vendor/outliertree/src/predict.cpp +33 -0
- data/vendor/outliertree/src/split.cpp +124 -20
- metadata +10 -8
- data/vendor/outliertree/src/Makevars +0 -3
| @@ -5,14 +5,19 @@ | |
| 5 5 |  | 
| 6 6 | 
             
            using namespace Rcpp;
         | 
| 7 7 |  | 
| 8 | 
            +
            #ifdef RCPP_USE_GLOBAL_ROSTREAM
         | 
| 9 | 
            +
            Rcpp::Rostream<true>&  Rcpp::Rcout = Rcpp::Rcpp_cout_get();
         | 
| 10 | 
            +
            Rcpp::Rostream<false>& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get();
         | 
| 11 | 
            +
            #endif
         | 
| 12 | 
            +
             | 
| 8 13 | 
             
            // deserialize_OutlierTree
         | 
| 9 | 
            -
            SEXP deserialize_OutlierTree(Rcpp::RawVector src);
         | 
| 10 | 
            -
            RcppExport SEXP _outliertree_deserialize_OutlierTree(SEXP srcSEXP) {
         | 
| 14 | 
            +
            SEXP deserialize_OutlierTree(Rcpp::RawVector src, SEXP ptr_obj);
         | 
| 15 | 
            +
            RcppExport SEXP _outliertree_deserialize_OutlierTree(SEXP srcSEXP, SEXP ptr_objSEXP) {
         | 
| 11 16 | 
             
            BEGIN_RCPP
         | 
| 12 17 | 
             
                Rcpp::RObject rcpp_result_gen;
         | 
| 13 | 
            -
                Rcpp::RNGScope rcpp_rngScope_gen;
         | 
| 14 18 | 
             
                Rcpp::traits::input_parameter< Rcpp::RawVector >::type src(srcSEXP);
         | 
| 15 | 
            -
                 | 
| 19 | 
            +
                Rcpp::traits::input_parameter< SEXP >::type ptr_obj(ptr_objSEXP);
         | 
| 20 | 
            +
                rcpp_result_gen = Rcpp::wrap(deserialize_OutlierTree(src, ptr_obj));
         | 
| 16 21 | 
             
                return rcpp_result_gen;
         | 
| 17 22 | 
             
            END_RCPP
         | 
| 18 23 | 
             
            }
         | 
| @@ -21,7 +26,6 @@ Rcpp::LogicalVector check_null_ptr_model(SEXP ptr_model); | |
| 21 26 | 
             
            RcppExport SEXP _outliertree_check_null_ptr_model(SEXP ptr_modelSEXP) {
         | 
| 22 27 | 
             
            BEGIN_RCPP
         | 
| 23 28 | 
             
                Rcpp::RObject rcpp_result_gen;
         | 
| 24 | 
            -
                Rcpp::RNGScope rcpp_rngScope_gen;
         | 
| 25 29 | 
             
                Rcpp::traits::input_parameter< SEXP >::type ptr_model(ptr_modelSEXP);
         | 
| 26 30 | 
             
                rcpp_result_gen = Rcpp::wrap(check_null_ptr_model(ptr_model));
         | 
| 27 31 | 
             
                return rcpp_result_gen;
         | 
| @@ -32,7 +36,6 @@ Rcpp::List fit_OutlierTree(Rcpp::NumericVector arr_num, size_t ncols_numeric, Rc | |
| 32 36 | 
             
            RcppExport SEXP _outliertree_fit_OutlierTree(SEXP arr_numSEXP, SEXP ncols_numericSEXP, SEXP arr_catSEXP, SEXP ncols_categSEXP, SEXP ncatSEXP, SEXP arr_ordSEXP, SEXP ncols_ordSEXP, SEXP ncat_ordSEXP, SEXP nrowsSEXP, SEXP cols_ignore_rSEXP, SEXP nthreadsSEXP, SEXP categ_as_binSEXP, SEXP ord_as_binSEXP, SEXP cat_bruteforce_subsetSEXP, SEXP categ_from_majSEXP, SEXP take_midSEXP, SEXP max_depthSEXP, SEXP max_perc_outliersSEXP, SEXP min_size_numericSEXP, SEXP min_size_categSEXP, SEXP min_gainSEXP, SEXP follow_allSEXP, SEXP gain_as_pctSEXP, SEXP z_normSEXP, SEXP z_outlierSEXP, SEXP return_outliersSEXP, SEXP cat_levelsSEXP, SEXP ord_levelsSEXP, SEXP colnames_numSEXP, SEXP colnames_catSEXP, SEXP colnames_ordSEXP, SEXP min_dateSEXP, SEXP min_tsSEXP) {
         | 
| 33 37 | 
             
            BEGIN_RCPP
         | 
| 34 38 | 
             
                Rcpp::RObject rcpp_result_gen;
         | 
| 35 | 
            -
                Rcpp::RNGScope rcpp_rngScope_gen;
         | 
| 36 39 | 
             
                Rcpp::traits::input_parameter< Rcpp::NumericVector >::type arr_num(arr_numSEXP);
         | 
| 37 40 | 
             
                Rcpp::traits::input_parameter< size_t >::type ncols_numeric(ncols_numericSEXP);
         | 
| 38 41 | 
             
                Rcpp::traits::input_parameter< Rcpp::IntegerVector >::type arr_cat(arr_catSEXP);
         | 
| @@ -75,7 +78,6 @@ Rcpp::List predict_OutlierTree(SEXP ptr_model, size_t nrows, int nthreads, Rcpp: | |
| 75 78 | 
             
            RcppExport SEXP _outliertree_predict_OutlierTree(SEXP ptr_modelSEXP, SEXP nrowsSEXP, SEXP nthreadsSEXP, SEXP arr_numSEXP, SEXP arr_catSEXP, SEXP arr_ordSEXP, SEXP cat_levelsSEXP, SEXP ord_levelsSEXP, SEXP colnames_numSEXP, SEXP colnames_catSEXP, SEXP colnames_ordSEXP, SEXP min_dateSEXP, SEXP min_tsSEXP) {
         | 
| 76 79 | 
             
            BEGIN_RCPP
         | 
| 77 80 | 
             
                Rcpp::RObject rcpp_result_gen;
         | 
| 78 | 
            -
                Rcpp::RNGScope rcpp_rngScope_gen;
         | 
| 79 81 | 
             
                Rcpp::traits::input_parameter< SEXP >::type ptr_model(ptr_modelSEXP);
         | 
| 80 82 | 
             
                Rcpp::traits::input_parameter< size_t >::type nrows(nrowsSEXP);
         | 
| 81 83 | 
             
                Rcpp::traits::input_parameter< int >::type nthreads(nthreadsSEXP);
         | 
| @@ -98,7 +100,6 @@ Rcpp::LogicalVector check_few_values(Rcpp::NumericVector arr_num, size_t nrows, | |
| 98 100 | 
             
            RcppExport SEXP _outliertree_check_few_values(SEXP arr_numSEXP, SEXP nrowsSEXP, SEXP ncolsSEXP, SEXP nthreadsSEXP) {
         | 
| 99 101 | 
             
            BEGIN_RCPP
         | 
| 100 102 | 
             
                Rcpp::RObject rcpp_result_gen;
         | 
| 101 | 
            -
                Rcpp::RNGScope rcpp_rngScope_gen;
         | 
| 102 103 | 
             
                Rcpp::traits::input_parameter< Rcpp::NumericVector >::type arr_num(arr_numSEXP);
         | 
| 103 104 | 
             
                Rcpp::traits::input_parameter< size_t >::type nrows(nrowsSEXP);
         | 
| 104 105 | 
             
                Rcpp::traits::input_parameter< size_t >::type ncols(ncolsSEXP);
         | 
| @@ -107,13 +108,23 @@ BEGIN_RCPP | |
| 107 108 | 
             
                return rcpp_result_gen;
         | 
| 108 109 | 
             
            END_RCPP
         | 
| 109 110 | 
             
            }
         | 
| 111 | 
            +
            // R_has_openmp
         | 
| 112 | 
            +
            bool R_has_openmp();
         | 
| 113 | 
            +
            RcppExport SEXP _outliertree_R_has_openmp() {
         | 
| 114 | 
            +
            BEGIN_RCPP
         | 
| 115 | 
            +
                Rcpp::RObject rcpp_result_gen;
         | 
| 116 | 
            +
                rcpp_result_gen = Rcpp::wrap(R_has_openmp());
         | 
| 117 | 
            +
                return rcpp_result_gen;
         | 
| 118 | 
            +
            END_RCPP
         | 
| 119 | 
            +
            }
         | 
| 110 120 |  | 
| 111 121 | 
             
            static const R_CallMethodDef CallEntries[] = {
         | 
| 112 | 
            -
                {"_outliertree_deserialize_OutlierTree", (DL_FUNC) &_outliertree_deserialize_OutlierTree,  | 
| 122 | 
            +
                {"_outliertree_deserialize_OutlierTree", (DL_FUNC) &_outliertree_deserialize_OutlierTree, 2},
         | 
| 113 123 | 
             
                {"_outliertree_check_null_ptr_model", (DL_FUNC) &_outliertree_check_null_ptr_model, 1},
         | 
| 114 124 | 
             
                {"_outliertree_fit_OutlierTree", (DL_FUNC) &_outliertree_fit_OutlierTree, 33},
         | 
| 115 125 | 
             
                {"_outliertree_predict_OutlierTree", (DL_FUNC) &_outliertree_predict_OutlierTree, 13},
         | 
| 116 126 | 
             
                {"_outliertree_check_few_values", (DL_FUNC) &_outliertree_check_few_values, 4},
         | 
| 127 | 
            +
                {"_outliertree_R_has_openmp", (DL_FUNC) &_outliertree_R_has_openmp, 0},
         | 
| 117 128 | 
             
                {NULL, NULL, 0}
         | 
| 118 129 | 
             
            };
         | 
| 119 130 |  | 
| @@ -1,5 +1,9 @@ | |
| 1 | 
            +
            #ifdef _FOR_R
         | 
| 2 | 
            +
             | 
| 1 3 | 
             
            #include <Rcpp.h>
         | 
| 4 | 
            +
            #include <Rcpp/unwindProtect.h>
         | 
| 2 5 | 
             
            // [[Rcpp::plugins(cpp11)]]
         | 
| 6 | 
            +
            // [[Rcpp::plugins(unwindProtect)]]
         | 
| 3 7 |  | 
| 4 8 | 
             
            /* This is to serialize the model objects */
         | 
| 5 9 | 
             
            // [[Rcpp::depends(Rcereal)]]
         | 
| @@ -7,14 +11,22 @@ | |
| 7 11 | 
             
            #include <cereal/types/vector.hpp>
         | 
| 8 12 | 
             
            #include <sstream>
         | 
| 9 13 | 
             
            #include <string>
         | 
| 14 | 
            +
            #include <limits>
         | 
| 10 15 |  | 
| 11 16 | 
             
            /* This is the package's header */
         | 
| 12 17 | 
             
            #include "outlier_tree.hpp"
         | 
| 13 18 |  | 
| 19 | 
            +
            SEXP alloc_RawVec(void *data)
         | 
| 20 | 
            +
            {
         | 
| 21 | 
            +
                size_t vec_size = *(size_t*)data;
         | 
| 22 | 
            +
                if (vec_size > (size_t)std::numeric_limits<R_xlen_t>::max())
         | 
| 23 | 
            +
                    Rcpp::stop("Resulting model is too large for R to handle.");
         | 
| 24 | 
            +
                return Rcpp::RawVector((R_xlen_t)vec_size);
         | 
| 25 | 
            +
            }
         | 
| 26 | 
            +
             | 
| 14 27 | 
             
            /* for model serialization and re-usage in R */
         | 
| 15 28 | 
             
            /* https://stackoverflow.com/questions/18474292/how-to-handle-c-internal-data-structure-in-r-in-order-to-allow-save-load */
         | 
| 16 29 | 
             
            /* this extra comment below the link is a workaround for Rcpp issue 675 in GitHub, do not remove it */
         | 
| 17 | 
            -
            #include <Rinternals.h>
         | 
| 18 30 | 
             
            Rcpp::RawVector serialize_OutlierTree(ModelOutputs *model_outputs)
         | 
| 19 31 | 
             
            {
         | 
| 20 32 | 
             
                std::stringstream ss;
         | 
| @@ -23,27 +35,60 @@ Rcpp::RawVector serialize_OutlierTree(ModelOutputs *model_outputs) | |
| 23 35 | 
             
                    oarchive(*model_outputs);
         | 
| 24 36 | 
             
                }
         | 
| 25 37 | 
             
                ss.seekg(0, ss.end);
         | 
| 26 | 
            -
                 | 
| 38 | 
            +
                std::stringstream::pos_type vec_size = ss.tellg();
         | 
| 39 | 
            +
                if (vec_size <= 0) {
         | 
| 40 | 
            +
                    Rcpp::Rcerr << "Error: model is too big to serialize, resulting object will not be usable.\n" << std::endl;
         | 
| 41 | 
            +
                    return Rcpp::RawVector();
         | 
| 42 | 
            +
                }
         | 
| 43 | 
            +
                size_t vec_size_ = (size_t)vec_size;
         | 
| 44 | 
            +
                Rcpp::RawVector retval = Rcpp::unwindProtect(alloc_RawVec, (void*)&vec_size_);
         | 
| 45 | 
            +
                if (!retval.size())
         | 
| 46 | 
            +
                    return retval;
         | 
| 27 47 | 
             
                ss.seekg(0, ss.beg);
         | 
| 28 | 
            -
                ss.read(reinterpret_cast<char*>( | 
| 48 | 
            +
                ss.read(reinterpret_cast<char*>(RAW(retval)), retval.size());
         | 
| 29 49 | 
             
                return retval;
         | 
| 30 50 | 
             
            }
         | 
| 31 51 |  | 
| 32 | 
            -
             | 
| 33 | 
            -
             | 
| 52 | 
            +
            SEXP safe_XPtr(void *model_ptr)
         | 
| 53 | 
            +
            {
         | 
| 54 | 
            +
                return Rcpp::XPtr<ModelOutputs>((ModelOutputs*)model_ptr, true);
         | 
| 55 | 
            +
            }
         | 
| 56 | 
            +
             | 
| 57 | 
            +
            void R_delete_model(SEXP R_ptr)
         | 
| 58 | 
            +
            {
         | 
| 59 | 
            +
                ModelOutputs *model = static_cast<ModelOutputs*>(R_ExternalPtrAddr(R_ptr));
         | 
| 60 | 
            +
                delete model;
         | 
| 61 | 
            +
                R_ClearExternalPtr(R_ptr);
         | 
| 62 | 
            +
            }
         | 
| 63 | 
            +
             | 
| 64 | 
            +
            // [[Rcpp::export(rng = false)]]
         | 
| 65 | 
            +
            SEXP deserialize_OutlierTree(Rcpp::RawVector src, SEXP ptr_obj)
         | 
| 34 66 | 
             
            {
         | 
| 35 67 | 
             
                std::stringstream ss;
         | 
| 36 | 
            -
                ss.write(reinterpret_cast<char*>( | 
| 68 | 
            +
                ss.write(reinterpret_cast<char*>(RAW(src)), src.size());
         | 
| 37 69 | 
             
                ss.seekg(0, ss.beg);
         | 
| 38 70 | 
             
                std::unique_ptr<ModelOutputs> model_outputs = std::unique_ptr<ModelOutputs>(new ModelOutputs());
         | 
| 39 71 | 
             
                {
         | 
| 40 72 | 
             
                    cereal::BinaryInputArchive iarchive(ss);
         | 
| 41 73 | 
             
                    iarchive(*model_outputs);
         | 
| 42 74 | 
             
                }
         | 
| 43 | 
            -
                 | 
| 75 | 
            +
                R_SetExternalPtrAddr(ptr_obj, model_outputs.get());
         | 
| 76 | 
            +
                R_RegisterCFinalizerEx(ptr_obj, R_delete_model, TRUE);
         | 
| 77 | 
            +
                model_outputs.release();
         | 
| 78 | 
            +
                return R_NilValue;
         | 
| 44 79 | 
             
            }
         | 
| 45 80 |  | 
| 46 | 
            -
             | 
| 81 | 
            +
            SEXP safe_int(void *x)
         | 
| 82 | 
            +
            {
         | 
| 83 | 
            +
                return Rcpp::wrap(*(int*)x);
         | 
| 84 | 
            +
            }
         | 
| 85 | 
            +
             | 
| 86 | 
            +
            SEXP safe_bool(void *x)
         | 
| 87 | 
            +
            {
         | 
| 88 | 
            +
                return Rcpp::wrap(*(bool*)x);
         | 
| 89 | 
            +
            }
         | 
| 90 | 
            +
             | 
| 91 | 
            +
            // [[Rcpp::export(rng = false)]]
         | 
| 47 92 | 
             
            Rcpp::LogicalVector check_null_ptr_model(SEXP ptr_model)
         | 
| 48 93 | 
             
            {
         | 
| 49 94 | 
             
                return Rcpp::LogicalVector(R_ExternalPtrAddr(ptr_model) == NULL);
         | 
| @@ -59,12 +104,22 @@ double* set_R_nan_as_C_nan(double *restrict x_R, std::vector<double> &x_C, size_ | |
| 59 104 | 
             
                return x_C.data();
         | 
| 60 105 | 
             
            }
         | 
| 61 106 |  | 
| 107 | 
            +
            double* set_R_nan_as_C_nan(double *restrict x_R, Rcpp::NumericVector &x_C, size_t n, int nthreads)
         | 
| 108 | 
            +
            {
         | 
| 109 | 
            +
                x_C = Rcpp::NumericVector(x_R, x_R + n);
         | 
| 110 | 
            +
                #pragma omp parallel for schedule(static) num_threads(nthreads) shared(x_R, x_C, n)
         | 
| 111 | 
            +
                for (size_t_for i = 0; i < n; i++)
         | 
| 112 | 
            +
                    if (isnan(x_R[i]) || Rcpp::NumericVector::is_na(x_R[i]) || Rcpp::traits::is_nan<REALSXP>(x_R[i]))
         | 
| 113 | 
            +
                        x_C[i] = NAN;
         | 
| 114 | 
            +
                return REAL(x_C);
         | 
| 115 | 
            +
            }
         | 
| 116 | 
            +
             | 
| 62 117 |  | 
| 63 118 | 
             
            /* for predicting outliers */
         | 
| 64 119 | 
             
            Rcpp::List describe_outliers(ModelOutputs &model_outputs,
         | 
| 65 | 
            -
                                         double *arr_num,
         | 
| 66 | 
            -
                                         int    *arr_cat,
         | 
| 67 | 
            -
                                         int    *arr_ord,
         | 
| 120 | 
            +
                                         double *restrict arr_num,
         | 
| 121 | 
            +
                                         int    *restrict arr_cat,
         | 
| 122 | 
            +
                                         int    *restrict arr_ord,
         | 
| 68 123 | 
             
                                         Rcpp::ListOf<Rcpp::StringVector> cat_levels,
         | 
| 69 124 | 
             
                                         Rcpp::ListOf<Rcpp::StringVector> ord_levels,
         | 
| 70 125 | 
             
                                         Rcpp::StringVector colnames_num,
         | 
| @@ -345,6 +400,12 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs, | |
| 345 400 | 
             
                                    }
         | 
| 346 401 | 
             
                                    break;
         | 
| 347 402 | 
             
                                }
         | 
| 403 | 
            +
             | 
| 404 | 
            +
                                default:
         | 
| 405 | 
            +
                                {
         | 
| 406 | 
            +
                                    assert(0);
         | 
| 407 | 
            +
                                    break;
         | 
| 408 | 
            +
                                }
         | 
| 348 409 | 
             
                            }
         | 
| 349 410 |  | 
| 350 411 | 
             
                            /* add the comparison point */
         | 
| @@ -377,6 +438,11 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs, | |
| 377 438 | 
             
                                            cond_clust["value_comp"] = Rcpp::as<Rcpp::CharacterVector>(NA_STRING);
         | 
| 378 439 | 
             
                                            break;
         | 
| 379 440 | 
             
                                        }
         | 
| 441 | 
            +
             | 
| 442 | 
            +
                                        default:
         | 
| 443 | 
            +
                                        {
         | 
| 444 | 
            +
                                            unexpected_error();
         | 
| 445 | 
            +
                                        }
         | 
| 380 446 | 
             
                                    }
         | 
| 381 447 | 
             
                                    break;
         | 
| 382 448 | 
             
                                }
         | 
| @@ -492,6 +558,12 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs, | |
| 492 558 | 
             
                                    }
         | 
| 493 559 | 
             
                                    break;
         | 
| 494 560 | 
             
                                }
         | 
| 561 | 
            +
             | 
| 562 | 
            +
                                default:
         | 
| 563 | 
            +
                                {
         | 
| 564 | 
            +
                                    assert(0);
         | 
| 565 | 
            +
                                    break;
         | 
| 566 | 
            +
                                }
         | 
| 495 567 |  | 
| 496 568 | 
             
                            }
         | 
| 497 569 | 
             
                            lst_cond[row] = Rcpp::List::create(Rcpp::clone(cond_clust));
         | 
| @@ -528,6 +600,12 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs, | |
| 528 600 | 
             
                                            cond_clust["column"] = Rcpp::as<Rcpp::CharacterVector>(colnames_ord[model_outputs.all_trees[outl_col][curr_tree].col_num]);
         | 
| 529 601 | 
             
                                            break;
         | 
| 530 602 | 
             
                                        }
         | 
| 603 | 
            +
             | 
| 604 | 
            +
                                        default:
         | 
| 605 | 
            +
                                        {
         | 
| 606 | 
            +
                                            assert(0);
         | 
| 607 | 
            +
                                            break;
         | 
| 608 | 
            +
                                        }
         | 
| 531 609 | 
             
                                    }
         | 
| 532 610 |  | 
| 533 611 | 
             
                                    /* add conditions from tree */
         | 
| @@ -599,6 +677,7 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs, | |
| 599 677 | 
             
                                                    break;
         | 
| 600 678 | 
             
                                                }
         | 
| 601 679 |  | 
| 680 | 
            +
                                                default: {}
         | 
| 602 681 | 
             
                                            }
         | 
| 603 682 | 
             
                                            break;
         | 
| 604 683 | 
             
                                        }
         | 
| @@ -696,6 +775,7 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs, | |
| 696 775 | 
             
                                                    break;
         | 
| 697 776 | 
             
                                                }
         | 
| 698 777 |  | 
| 778 | 
            +
                                                default: {}
         | 
| 699 779 | 
             
                                            }
         | 
| 700 780 | 
             
                                            break;
         | 
| 701 781 | 
             
                                        }
         | 
| @@ -758,10 +838,16 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs, | |
| 758 838 | 
             
                                                    break;
         | 
| 759 839 | 
             
                                                }
         | 
| 760 840 |  | 
| 841 | 
            +
                                                default: {}
         | 
| 761 842 | 
             
                                            }
         | 
| 762 843 | 
             
                                            break;
         | 
| 763 844 | 
             
                                        }
         | 
| 764 845 |  | 
| 846 | 
            +
                                        default:
         | 
| 847 | 
            +
                                        {
         | 
| 848 | 
            +
                                            assert(0);
         | 
| 849 | 
            +
                                            break;
         | 
| 850 | 
            +
                                        }
         | 
| 765 851 | 
             
                                    }
         | 
| 766 852 | 
             
                                }
         | 
| 767 853 |  | 
| @@ -796,6 +882,12 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs, | |
| 796 882 | 
             
                                            cond_clust["column"] = Rcpp::as<Rcpp::CharacterVector>(colnames_ord[model_outputs.all_trees[outl_col][parent_tree].col_num]);
         | 
| 797 883 | 
             
                                            break;
         | 
| 798 884 | 
             
                                        }
         | 
| 885 | 
            +
             | 
| 886 | 
            +
                                        default:
         | 
| 887 | 
            +
                                        {
         | 
| 888 | 
            +
                                            assert(0);
         | 
| 889 | 
            +
                                            break;
         | 
| 890 | 
            +
                                        }
         | 
| 799 891 | 
             
                                    }
         | 
| 800 892 |  | 
| 801 893 |  | 
| @@ -835,6 +927,11 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs, | |
| 835 927 | 
             
                                                    cond_clust["value_comp"] = Rcpp::as<Rcpp::CharacterVector>(NA_STRING);
         | 
| 836 928 | 
             
                                                    break;
         | 
| 837 929 | 
             
                                                }
         | 
| 930 | 
            +
             | 
| 931 | 
            +
                                                default:
         | 
| 932 | 
            +
                                                {
         | 
| 933 | 
            +
                                                    unexpected_error();
         | 
| 934 | 
            +
                                                }
         | 
| 838 935 | 
             
                                            }
         | 
| 839 936 | 
             
                                            break;
         | 
| 840 937 | 
             
                                        }
         | 
| @@ -1011,6 +1108,11 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs, | |
| 1011 1108 | 
             
                                            break;
         | 
| 1012 1109 | 
             
                                        }
         | 
| 1013 1110 |  | 
| 1111 | 
            +
                                        default:
         | 
| 1112 | 
            +
                                        {
         | 
| 1113 | 
            +
                                            assert(0);
         | 
| 1114 | 
            +
                                            break;
         | 
| 1115 | 
            +
                                        }
         | 
| 1014 1116 | 
             
                                    }
         | 
| 1015 1117 |  | 
| 1016 1118 |  | 
| @@ -1038,6 +1140,37 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs, | |
| 1038 1140 | 
             
                return outp;
         | 
| 1039 1141 | 
             
            }
         | 
| 1040 1142 |  | 
| 1143 | 
            +
            struct args_describe_outliers {
         | 
| 1144 | 
            +
                ModelOutputs *model_outputs;
         | 
| 1145 | 
            +
                double *arr_num;
         | 
| 1146 | 
            +
                int    *arr_cat;
         | 
| 1147 | 
            +
                int    *arr_ord;
         | 
| 1148 | 
            +
                Rcpp::ListOf<Rcpp::StringVector> *cat_levels;
         | 
| 1149 | 
            +
                Rcpp::ListOf<Rcpp::StringVector> *ord_levels;
         | 
| 1150 | 
            +
                Rcpp::StringVector *colnames_num;
         | 
| 1151 | 
            +
                Rcpp::StringVector *colnames_cat;
         | 
| 1152 | 
            +
                Rcpp::StringVector *colnames_ord;
         | 
| 1153 | 
            +
                Rcpp::NumericVector *min_date;
         | 
| 1154 | 
            +
                Rcpp::NumericVector *min_ts;
         | 
| 1155 | 
            +
            };
         | 
| 1156 | 
            +
             | 
| 1157 | 
            +
            SEXP describe_outliers_wrapper(void *args_)
         | 
| 1158 | 
            +
            {
         | 
| 1159 | 
            +
                args_describe_outliers *args = (args_describe_outliers*)args_;
         | 
| 1160 | 
            +
                return describe_outliers(*(args->model_outputs),
         | 
| 1161 | 
            +
                                         args->arr_num,
         | 
| 1162 | 
            +
                                         args->arr_cat,
         | 
| 1163 | 
            +
                                         args->arr_ord,
         | 
| 1164 | 
            +
                                         *(args->cat_levels),
         | 
| 1165 | 
            +
                                         *(args->ord_levels),
         | 
| 1166 | 
            +
                                         *(args->colnames_num),
         | 
| 1167 | 
            +
                                         *(args->colnames_cat),
         | 
| 1168 | 
            +
                                         *(args->colnames_ord),
         | 
| 1169 | 
            +
                                         *(args->min_date),
         | 
| 1170 | 
            +
                                         *(args->min_ts));
         | 
| 1171 | 
            +
            }
         | 
| 1172 | 
            +
             | 
| 1173 | 
            +
             | 
| 1041 1174 | 
             
            /* for extracting info about flaggable outliers */
         | 
| 1042 1175 | 
             
            Rcpp::List extract_outl_bounds(ModelOutputs &model_outputs,
         | 
| 1043 1176 | 
             
                                           Rcpp::ListOf<Rcpp::StringVector> cat_levels,
         | 
| @@ -1102,9 +1235,27 @@ Rcpp::List extract_outl_bounds(ModelOutputs &model_outputs, | |
| 1102 1235 | 
             
                return outp;
         | 
| 1103 1236 | 
             
            }
         | 
| 1104 1237 |  | 
| 1238 | 
            +
            struct args_extract_outl_bounds {
         | 
| 1239 | 
            +
                ModelOutputs *model_outputs;
         | 
| 1240 | 
            +
                Rcpp::ListOf<Rcpp::StringVector> *cat_levels;
         | 
| 1241 | 
            +
                Rcpp::ListOf<Rcpp::StringVector> *ord_levels;
         | 
| 1242 | 
            +
                Rcpp::NumericVector *min_date;
         | 
| 1243 | 
            +
                Rcpp::NumericVector *min_ts;
         | 
| 1244 | 
            +
            };
         | 
| 1245 | 
            +
             | 
| 1246 | 
            +
            SEXP extract_outl_bounds_wrapper(void *args_)
         | 
| 1247 | 
            +
            {
         | 
| 1248 | 
            +
                args_extract_outl_bounds *args = (args_extract_outl_bounds*)args_;
         | 
| 1249 | 
            +
                return extract_outl_bounds(*(args->model_outputs),
         | 
| 1250 | 
            +
                                           *(args->cat_levels),
         | 
| 1251 | 
            +
                                           *(args->ord_levels),
         | 
| 1252 | 
            +
                                           *(args->min_date),
         | 
| 1253 | 
            +
                                           *(args->min_ts));
         | 
| 1254 | 
            +
            }
         | 
| 1255 | 
            +
             | 
| 1105 1256 |  | 
| 1106 1257 | 
             
            /* external functions for fitting the model and predicting outliers */
         | 
| 1107 | 
            -
            // [[Rcpp::export]]
         | 
| 1258 | 
            +
            // [[Rcpp::export(rng = false)]]
         | 
| 1108 1259 | 
             
            Rcpp::List fit_OutlierTree(Rcpp::NumericVector arr_num, size_t ncols_numeric,
         | 
| 1109 1260 | 
             
                                       Rcpp::IntegerVector arr_cat, size_t ncols_categ,   Rcpp::IntegerVector ncat,
         | 
| 1110 1261 | 
             
                                       Rcpp::IntegerVector arr_ord, size_t ncols_ord,     Rcpp::IntegerVector ncat_ord,
         | 
| @@ -1121,8 +1272,17 @@ Rcpp::List fit_OutlierTree(Rcpp::NumericVector arr_num, size_t ncols_numeric, | |
| 1121 1272 | 
             
                                       Rcpp::NumericVector min_date,
         | 
| 1122 1273 | 
             
                                       Rcpp::NumericVector min_ts)
         | 
| 1123 1274 | 
             
            {
         | 
| 1275 | 
            +
                Rcpp::List outp = Rcpp::List::create(
         | 
| 1276 | 
            +
                    Rcpp::_["ptr_model"] = R_NilValue,
         | 
| 1277 | 
            +
                    Rcpp::_["serialized_obj"] = R_NilValue,
         | 
| 1278 | 
            +
                    Rcpp::_["bounds"] = R_NilValue,
         | 
| 1279 | 
            +
                    Rcpp::_["outliers_info"] = R_NilValue,
         | 
| 1280 | 
            +
                    Rcpp::_["ntrees"] = R_NilValue,
         | 
| 1281 | 
            +
                    Rcpp::_["nclust"] = R_NilValue,
         | 
| 1282 | 
            +
                    Rcpp::_["found_outliers"] = R_NilValue
         | 
| 1283 | 
            +
                );
         | 
| 1284 | 
            +
             | 
| 1124 1285 | 
             
                bool found_outliers;
         | 
| 1125 | 
            -
                Rcpp::List outp;
         | 
| 1126 1286 | 
             
                size_t tot_cols = ncols_numeric + ncols_categ + ncols_ord;
         | 
| 1127 1287 | 
             
                std::vector<char> cols_ignore;
         | 
| 1128 1288 | 
             
                char *cols_ignore_ptr = NULL;
         | 
| @@ -1132,54 +1292,70 @@ Rcpp::List fit_OutlierTree(Rcpp::NumericVector arr_num, size_t ncols_numeric, | |
| 1132 1292 | 
             
                    cols_ignore_ptr = &cols_ignore[0];
         | 
| 1133 1293 | 
             
                }
         | 
| 1134 1294 | 
             
                std::vector<double> Xcpp;
         | 
| 1135 | 
            -
                double *arr_num_C = set_R_nan_as_C_nan( | 
| 1295 | 
            +
                double *arr_num_C = set_R_nan_as_C_nan(REAL(arr_num), Xcpp, arr_num.size(), nthreads);
         | 
| 1136 1296 |  | 
| 1137 1297 | 
             
                std::unique_ptr<ModelOutputs> model_outputs = std::unique_ptr<ModelOutputs>(new ModelOutputs());
         | 
| 1298 | 
            +
                try {
         | 
| 1138 1299 | 
             
                found_outliers = fit_outliers_models(*model_outputs,
         | 
| 1139 1300 | 
             
                                                     arr_num_C, ncols_numeric,
         | 
| 1140 | 
            -
                                                      | 
| 1141 | 
            -
                                                      | 
| 1301 | 
            +
                                                     INTEGER(arr_cat), ncols_categ, INTEGER(ncat),
         | 
| 1302 | 
            +
                                                     INTEGER(arr_ord), ncols_ord,   INTEGER(ncat_ord),
         | 
| 1142 1303 | 
             
                                                     nrows, cols_ignore_ptr, nthreads,
         | 
| 1143 1304 | 
             
                                                     categ_as_bin, ord_as_bin, cat_bruteforce_subset, categ_from_maj, take_mid,
         | 
| 1144 1305 | 
             
                                                     max_depth, max_perc_outliers, min_size_numeric, min_size_categ,
         | 
| 1145 1306 | 
             
                                                     min_gain, gain_as_pct, follow_all, z_norm, z_outlier);
         | 
| 1146 1307 |  | 
| 1147 | 
            -
                 | 
| 1148 | 
            -
             | 
| 1149 | 
            -
             | 
| 1150 | 
            -
             | 
| 1151 | 
            -
             | 
| 1152 | 
            -
             | 
| 1308 | 
            +
                args_extract_outl_bounds temp = {
         | 
| 1309 | 
            +
                    model_outputs.get(),
         | 
| 1310 | 
            +
                    &cat_levels,
         | 
| 1311 | 
            +
                    &ord_levels,
         | 
| 1312 | 
            +
                    &min_date,
         | 
| 1313 | 
            +
                    &min_ts
         | 
| 1314 | 
            +
                };
         | 
| 1315 | 
            +
                outp["bounds"] = Rcpp::unwindProtect(extract_outl_bounds_wrapper, (void*)&temp);
         | 
| 1153 1316 | 
             
                outp["serialized_obj"] = serialize_OutlierTree(model_outputs.get());
         | 
| 1317 | 
            +
                } catch(std::bad_alloc &e) {
         | 
| 1318 | 
            +
                    Rcpp::stop("Insufficient memory.\n");
         | 
| 1319 | 
            +
                }
         | 
| 1320 | 
            +
             | 
| 1321 | 
            +
                if (!Rf_xlength(outp["serialized_obj"]))
         | 
| 1322 | 
            +
                    return outp;
         | 
| 1154 1323 | 
             
                if (return_outliers) {
         | 
| 1155 | 
            -
                     | 
| 1156 | 
            -
             | 
| 1157 | 
            -
             | 
| 1158 | 
            -
             | 
| 1159 | 
            -
             | 
| 1160 | 
            -
             | 
| 1161 | 
            -
             | 
| 1162 | 
            -
             | 
| 1163 | 
            -
             | 
| 1164 | 
            -
             | 
| 1165 | 
            -
             | 
| 1324 | 
            +
                    args_describe_outliers temp = {
         | 
| 1325 | 
            +
                        model_outputs.get(),
         | 
| 1326 | 
            +
                        arr_num_C,
         | 
| 1327 | 
            +
                        INTEGER(arr_cat),
         | 
| 1328 | 
            +
                        INTEGER(arr_ord),
         | 
| 1329 | 
            +
                        &cat_levels,
         | 
| 1330 | 
            +
                        &ord_levels,
         | 
| 1331 | 
            +
                        &colnames_num,
         | 
| 1332 | 
            +
                        &colnames_cat,
         | 
| 1333 | 
            +
                        &colnames_ord,
         | 
| 1334 | 
            +
                        &min_date,
         | 
| 1335 | 
            +
                        &min_ts
         | 
| 1336 | 
            +
                    };
         | 
| 1337 | 
            +
                    outp["outliers_info"] = Rcpp::unwindProtect(describe_outliers_wrapper, (void*)&temp);
         | 
| 1166 1338 | 
             
                }
         | 
| 1339 | 
            +
                forget_row_outputs(*model_outputs);
         | 
| 1340 | 
            +
             | 
| 1167 1341 | 
             
                /* add number of trees and clusters */
         | 
| 1168 1342 | 
             
                size_t ntrees = 0, nclust = 0;
         | 
| 1169 1343 | 
             
                for (size_t col = 0; col < model_outputs->all_trees.size(); col++) {
         | 
| 1170 1344 | 
             
                	ntrees += model_outputs->all_trees[col].size();
         | 
| 1171 1345 | 
             
                	nclust += model_outputs->all_clusters[col].size();
         | 
| 1172 1346 | 
             
                }
         | 
| 1173 | 
            -
                 | 
| 1174 | 
            -
                 | 
| 1175 | 
            -
                outp[" | 
| 1347 | 
            +
                int ntrees_int = (int)ntrees;
         | 
| 1348 | 
            +
                int nclust_int = (int)nclust;
         | 
| 1349 | 
            +
                outp["ntrees"] = Rcpp::unwindProtect(safe_int, (void*)&ntrees_int);
         | 
| 1350 | 
            +
                outp["nclust"] = Rcpp::unwindProtect(safe_int, (void*)&nclust_int);
         | 
| 1351 | 
            +
                outp["found_outliers"] = Rcpp::unwindProtect(safe_bool, (void*)&found_outliers);
         | 
| 1176 1352 |  | 
| 1177 | 
            -
                 | 
| 1178 | 
            -
                 | 
| 1353 | 
            +
                outp["ptr_model"] = Rcpp::unwindProtect(safe_XPtr, model_outputs.get());
         | 
| 1354 | 
            +
                model_outputs.release();
         | 
| 1179 1355 | 
             
                return outp;
         | 
| 1180 1356 | 
             
            }
         | 
| 1181 1357 |  | 
| 1182 | 
            -
            // [[Rcpp::export]]
         | 
| 1358 | 
            +
            // [[Rcpp::export(rng = false)]]
         | 
| 1183 1359 | 
             
            Rcpp::List predict_OutlierTree(SEXP ptr_model, size_t nrows, int nthreads,
         | 
| 1184 1360 | 
             
                                           Rcpp::NumericVector arr_num, Rcpp::IntegerVector arr_cat, Rcpp::IntegerVector arr_ord,
         | 
| 1185 1361 | 
             
                                           Rcpp::ListOf<Rcpp::StringVector> cat_levels,
         | 
| @@ -1190,36 +1366,59 @@ Rcpp::List predict_OutlierTree(SEXP ptr_model, size_t nrows, int nthreads, | |
| 1190 1366 | 
             
                                           Rcpp::NumericVector min_date,
         | 
| 1191 1367 | 
             
                                           Rcpp::NumericVector min_ts)
         | 
| 1192 1368 | 
             
            {
         | 
| 1193 | 
            -
                 | 
| 1194 | 
            -
                double *arr_num_C = set_R_nan_as_C_nan( | 
| 1369 | 
            +
                Rcpp::NumericVector Xcpp;
         | 
| 1370 | 
            +
                double *arr_num_C = set_R_nan_as_C_nan(REAL(arr_num), Xcpp, arr_num.size(), nthreads);
         | 
| 1195 1371 |  | 
| 1196 1372 | 
             
                ModelOutputs *model_outputs = static_cast<ModelOutputs*>(R_ExternalPtrAddr(ptr_model));
         | 
| 1197 | 
            -
                bool found_outliers = find_new_outliers( | 
| 1373 | 
            +
                bool found_outliers = find_new_outliers(arr_num_C, INTEGER(arr_cat), INTEGER(arr_ord),
         | 
| 1198 1374 | 
             
                                                        nrows, nthreads, *model_outputs);
         | 
| 1199 | 
            -
                 | 
| 1200 | 
            -
             | 
| 1201 | 
            -
             | 
| 1202 | 
            -
             | 
| 1203 | 
            -
             | 
| 1204 | 
            -
             | 
| 1205 | 
            -
             | 
| 1206 | 
            -
             | 
| 1207 | 
            -
             | 
| 1208 | 
            -
             | 
| 1209 | 
            -
             | 
| 1210 | 
            -
             | 
| 1375 | 
            +
                args_describe_outliers temp = {
         | 
| 1376 | 
            +
                    model_outputs,
         | 
| 1377 | 
            +
                    arr_num_C,
         | 
| 1378 | 
            +
                    INTEGER(arr_cat),
         | 
| 1379 | 
            +
                    INTEGER(arr_ord),
         | 
| 1380 | 
            +
                    &cat_levels,
         | 
| 1381 | 
            +
                    &ord_levels,
         | 
| 1382 | 
            +
                    &colnames_num,
         | 
| 1383 | 
            +
                    &colnames_cat,
         | 
| 1384 | 
            +
                    &colnames_ord,
         | 
| 1385 | 
            +
                    &min_date,
         | 
| 1386 | 
            +
                    &min_ts
         | 
| 1387 | 
            +
                };
         | 
| 1388 | 
            +
             | 
| 1389 | 
            +
                Rcpp::List outp;
         | 
| 1390 | 
            +
                try {
         | 
| 1391 | 
            +
                    outp = Rcpp::unwindProtect(describe_outliers_wrapper, (void*)&temp);
         | 
| 1392 | 
            +
                } catch(...) {
         | 
| 1393 | 
            +
                    forget_row_outputs(*model_outputs);
         | 
| 1394 | 
            +
                    throw;
         | 
| 1395 | 
            +
                }
         | 
| 1211 1396 | 
             
                forget_row_outputs(*model_outputs);
         | 
| 1397 | 
            +
                outp["found_outliers"] = Rcpp::LogicalVector(found_outliers);
         | 
| 1212 1398 | 
             
                return outp;
         | 
| 1213 1399 | 
             
            }
         | 
| 1214 1400 |  | 
| 1215 | 
            -
            // [[Rcpp::export]]
         | 
| 1401 | 
            +
            // [[Rcpp::export(rng = false)]]
         | 
| 1216 1402 | 
             
            Rcpp::LogicalVector check_few_values(Rcpp::NumericVector arr_num, size_t nrows, size_t ncols, int nthreads)
         | 
| 1217 1403 | 
             
            {
         | 
| 1218 | 
            -
                std::vector<char> too_few_vals(ncols, 0);
         | 
| 1219 | 
            -
                check_more_two_values(&arr_num[0], nrows, ncols, nthreads, too_few_vals.data());
         | 
| 1220 1404 | 
             
                Rcpp::LogicalVector outp(ncols);
         | 
| 1405 | 
            +
                std::vector<char> too_few_vals(ncols, 0);
         | 
| 1406 | 
            +
                check_more_two_values(REAL(arr_num), nrows, ncols, nthreads, too_few_vals.data());
         | 
| 1221 1407 | 
             
                for (size_t col = 0; col < ncols; col++) {
         | 
| 1222 1408 | 
             
                    outp[col] = (bool) too_few_vals[col];
         | 
| 1223 1409 | 
             
                }
         | 
| 1224 1410 | 
             
                return outp;
         | 
| 1225 1411 | 
             
            }
         | 
| 1412 | 
            +
             | 
| 1413 | 
            +
             | 
| 1414 | 
            +
            // [[Rcpp::export(rng = false)]]
         | 
| 1415 | 
            +
            bool R_has_openmp()
         | 
| 1416 | 
            +
            {
         | 
| 1417 | 
            +
                #ifdef _OPENMP
         | 
| 1418 | 
            +
                return true;
         | 
| 1419 | 
            +
                #else
         | 
| 1420 | 
            +
                return false;
         | 
| 1421 | 
            +
                #endif
         | 
| 1422 | 
            +
            }
         | 
| 1423 | 
            +
             | 
| 1424 | 
            +
            #endif /* _FOR_R */
         | 
| @@ -74,7 +74,7 @@ | |
| 74 74 | 
             
            */
         | 
| 75 75 | 
             
            void find_outlier_categories(size_t categ_counts[], size_t ncateg, size_t tot, double max_perc_outliers,
         | 
| 76 76 | 
             
                                         long double perc_threshold[], size_t buffer_ix[], long double buffer_perc[],
         | 
| 77 | 
            -
                                         double z_norm, char is_outlier[], bool *found_outliers, bool *new_is_outlier,
         | 
| 77 | 
            +
                                         double z_norm, signed char is_outlier[], bool *found_outliers, bool *new_is_outlier,
         | 
| 78 78 | 
             
                                         double *next_most_comm)
         | 
| 79 79 | 
             
            {
         | 
| 80 80 | 
             
                //TODO: must also establish bounds for new, unseen categories
         | 
| @@ -90,7 +90,7 @@ void find_outlier_categories(size_t categ_counts[], size_t ncateg, size_t tot, d | |
| 90 90 | 
             
                size_t size_tail = 0;
         | 
| 91 91 |  | 
| 92 92 | 
             
                /* reset the temporary arrays and fill them */
         | 
| 93 | 
            -
                memset(is_outlier, 0, ncateg * sizeof(char));
         | 
| 93 | 
            +
                memset(is_outlier, 0, ncateg * sizeof(signed char));
         | 
| 94 94 | 
             
                for (size_t cat = 0; cat < ncateg; cat++) {
         | 
| 95 95 | 
             
                    buffer_ix[cat] = cat;
         | 
| 96 96 | 
             
                    buffer_perc[cat] = (categ_counts[cat] > 0)? ((long double)categ_counts[cat] / tot_dbl) : 0;
         | 
| @@ -225,13 +225,13 @@ void find_outlier_categories(size_t categ_counts[], size_t ncateg, size_t tot, d | |
| 225 225 | 
             
            *        Category to which the majority of the observations belong.
         | 
| 226 226 | 
             
            */
         | 
| 227 227 | 
             
            void find_outlier_categories_by_maj(size_t categ_counts[], size_t ncateg, size_t tot, double max_perc_outliers,
         | 
| 228 | 
            -
                                                long double prior_prob[], double z_outlier, char is_outlier[],
         | 
| 228 | 
            +
                                                long double prior_prob[], double z_outlier, signed char is_outlier[],
         | 
| 229 229 | 
             
                                                bool *found_outliers, bool *new_is_outlier, int *categ_maj)
         | 
| 230 230 | 
             
            {
         | 
| 231 231 | 
             
                /* initialize parameters as needed */
         | 
| 232 232 | 
             
                *found_outliers = false;
         | 
| 233 233 | 
             
                *new_is_outlier = false;
         | 
| 234 | 
            -
                memset(is_outlier, 0, ncateg * sizeof(char));
         | 
| 234 | 
            +
                memset(is_outlier, 0, ncateg * sizeof(signed char));
         | 
| 235 235 | 
             
                size_t max_outliers = (size_t) calculate_max_outliers((long double)tot, max_perc_outliers);
         | 
| 236 236 | 
             
                long double tot_dbl = (long double) (tot + 1);
         | 
| 237 237 | 
             
                size_t n_non_maj;
         | 
| @@ -283,7 +283,7 @@ void find_outlier_categories_by_maj(size_t categ_counts[], size_t ncateg, size_t | |
| 283 283 | 
             
            *        Proportion of the least common non-outlier category.
         | 
| 284 284 | 
             
            */
         | 
| 285 285 | 
             
            bool find_outlier_categories_no_cond(size_t categ_counts[], size_t ncateg, size_t tot,
         | 
| 286 | 
            -
                                                 char is_outlier[], double *next_most_comm)
         | 
| 286 | 
            +
                                                 signed char is_outlier[], double *next_most_comm)
         | 
| 287 287 | 
             
            {
         | 
| 288 288 | 
             
                /* if sample is too small, don't flag any as outliers */
         | 
| 289 289 | 
             
                if (tot < 1000) return false;
         | 
| @@ -296,7 +296,7 @@ bool find_outlier_categories_no_cond(size_t categ_counts[], size_t ncateg, size_ | |
| 296 296 |  | 
| 297 297 | 
             
                /* look if there's any category meeting the first condition and none meeting the second one */
         | 
| 298 298 | 
             
                bool has_outlier_cat = false;
         | 
| 299 | 
            -
                memset(is_outlier, 0, sizeof(char) * ncateg);
         | 
| 299 | 
            +
                memset(is_outlier, 0, sizeof(signed char) * ncateg);
         | 
| 300 300 | 
             
                for (size_t cat = 0; cat < ncateg; cat++) {
         | 
| 301 301 | 
             
                    if (categ_counts[cat] > max_outliers && categ_counts[cat] < max_next_most_comm) {
         | 
| 302 302 | 
             
                        has_outlier_cat = false;
         |