outliertree 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,9 @@
1
+ #ifdef _FOR_R
2
+
1
3
  #include <Rcpp.h>
4
+ #include <Rcpp/unwindProtect.h>
2
5
  // [[Rcpp::plugins(cpp11)]]
6
+ // [[Rcpp::plugins(unwindProtect)]]
3
7
 
4
8
  /* This is to serialize the model objects */
5
9
  // [[Rcpp::depends(Rcereal)]]
@@ -7,14 +11,22 @@
7
11
  #include <cereal/types/vector.hpp>
8
12
  #include <sstream>
9
13
  #include <string>
14
+ #include <limits>
10
15
 
11
16
  /* This is the package's header */
12
17
  #include "outlier_tree.hpp"
13
18
 
19
+ SEXP alloc_RawVec(void *data)
20
+ {
21
+ size_t vec_size = *(size_t*)data;
22
+ if (vec_size > (size_t)std::numeric_limits<R_xlen_t>::max())
23
+ Rcpp::stop("Resulting model is too large for R to handle.");
24
+ return Rcpp::RawVector((R_xlen_t)vec_size);
25
+ }
26
+
14
27
  /* for model serialization and re-usage in R */
15
28
  /* https://stackoverflow.com/questions/18474292/how-to-handle-c-internal-data-structure-in-r-in-order-to-allow-save-load */
16
29
  /* this extra comment below the link is a workaround for Rcpp issue 675 in GitHub, do not remove it */
17
- #include <Rinternals.h>
18
30
  Rcpp::RawVector serialize_OutlierTree(ModelOutputs *model_outputs)
19
31
  {
20
32
  std::stringstream ss;
@@ -23,27 +35,60 @@ Rcpp::RawVector serialize_OutlierTree(ModelOutputs *model_outputs)
23
35
  oarchive(*model_outputs);
24
36
  }
25
37
  ss.seekg(0, ss.end);
26
- Rcpp::RawVector retval(ss.tellg());
38
+ std::stringstream::pos_type vec_size = ss.tellg();
39
+ if (vec_size <= 0) {
40
+ Rcpp::Rcerr << "Error: model is too big to serialize, resulting object will not be usable.\n" << std::endl;
41
+ return Rcpp::RawVector();
42
+ }
43
+ size_t vec_size_ = (size_t)vec_size;
44
+ Rcpp::RawVector retval = Rcpp::unwindProtect(alloc_RawVec, (void*)&vec_size_);
45
+ if (!retval.size())
46
+ return retval;
27
47
  ss.seekg(0, ss.beg);
28
- ss.read(reinterpret_cast<char*>(&retval[0]), retval.size());
48
+ ss.read(reinterpret_cast<char*>(RAW(retval)), retval.size());
29
49
  return retval;
30
50
  }
31
51
 
32
- // [[Rcpp::export]]
33
- SEXP deserialize_OutlierTree(Rcpp::RawVector src)
52
+ SEXP safe_XPtr(void *model_ptr)
53
+ {
54
+ return Rcpp::XPtr<ModelOutputs>((ModelOutputs*)model_ptr, true);
55
+ }
56
+
57
+ void R_delete_model(SEXP R_ptr)
58
+ {
59
+ ModelOutputs *model = static_cast<ModelOutputs*>(R_ExternalPtrAddr(R_ptr));
60
+ delete model;
61
+ R_ClearExternalPtr(R_ptr);
62
+ }
63
+
64
+ // [[Rcpp::export(rng = false)]]
65
+ SEXP deserialize_OutlierTree(Rcpp::RawVector src, SEXP ptr_obj)
34
66
  {
35
67
  std::stringstream ss;
36
- ss.write(reinterpret_cast<char*>(&src[0]), src.size());
68
+ ss.write(reinterpret_cast<char*>(RAW(src)), src.size());
37
69
  ss.seekg(0, ss.beg);
38
70
  std::unique_ptr<ModelOutputs> model_outputs = std::unique_ptr<ModelOutputs>(new ModelOutputs());
39
71
  {
40
72
  cereal::BinaryInputArchive iarchive(ss);
41
73
  iarchive(*model_outputs);
42
74
  }
43
- return Rcpp::XPtr<ModelOutputs>(model_outputs.release(), true);
75
+ R_SetExternalPtrAddr(ptr_obj, model_outputs.get());
76
+ R_RegisterCFinalizerEx(ptr_obj, R_delete_model, TRUE);
77
+ model_outputs.release();
78
+ return R_NilValue;
44
79
  }
45
80
 
46
- // [[Rcpp::export]]
81
+ SEXP safe_int(void *x)
82
+ {
83
+ return Rcpp::wrap(*(int*)x);
84
+ }
85
+
86
+ SEXP safe_bool(void *x)
87
+ {
88
+ return Rcpp::wrap(*(bool*)x);
89
+ }
90
+
91
+ // [[Rcpp::export(rng = false)]]
47
92
  Rcpp::LogicalVector check_null_ptr_model(SEXP ptr_model)
48
93
  {
49
94
  return Rcpp::LogicalVector(R_ExternalPtrAddr(ptr_model) == NULL);
@@ -59,12 +104,22 @@ double* set_R_nan_as_C_nan(double *restrict x_R, std::vector<double> &x_C, size_
59
104
  return x_C.data();
60
105
  }
61
106
 
107
+ double* set_R_nan_as_C_nan(double *restrict x_R, Rcpp::NumericVector &x_C, size_t n, int nthreads)
108
+ {
109
+ x_C = Rcpp::NumericVector(x_R, x_R + n);
110
+ #pragma omp parallel for schedule(static) num_threads(nthreads) shared(x_R, x_C, n)
111
+ for (size_t_for i = 0; i < n; i++)
112
+ if (isnan(x_R[i]) || Rcpp::NumericVector::is_na(x_R[i]) || Rcpp::traits::is_nan<REALSXP>(x_R[i]))
113
+ x_C[i] = NAN;
114
+ return REAL(x_C);
115
+ }
116
+
62
117
 
63
118
  /* for predicting outliers */
64
119
  Rcpp::List describe_outliers(ModelOutputs &model_outputs,
65
- double *arr_num,
66
- int *arr_cat,
67
- int *arr_ord,
120
+ double *restrict arr_num,
121
+ int *restrict arr_cat,
122
+ int *restrict arr_ord,
68
123
  Rcpp::ListOf<Rcpp::StringVector> cat_levels,
69
124
  Rcpp::ListOf<Rcpp::StringVector> ord_levels,
70
125
  Rcpp::StringVector colnames_num,
@@ -345,6 +400,12 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs,
345
400
  }
346
401
  break;
347
402
  }
403
+
404
+ default:
405
+ {
406
+ assert(0);
407
+ break;
408
+ }
348
409
  }
349
410
 
350
411
  /* add the comparison point */
@@ -377,6 +438,11 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs,
377
438
  cond_clust["value_comp"] = Rcpp::as<Rcpp::CharacterVector>(NA_STRING);
378
439
  break;
379
440
  }
441
+
442
+ default:
443
+ {
444
+ unexpected_error();
445
+ }
380
446
  }
381
447
  break;
382
448
  }
@@ -492,6 +558,12 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs,
492
558
  }
493
559
  break;
494
560
  }
561
+
562
+ default:
563
+ {
564
+ assert(0);
565
+ break;
566
+ }
495
567
 
496
568
  }
497
569
  lst_cond[row] = Rcpp::List::create(Rcpp::clone(cond_clust));
@@ -528,6 +600,12 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs,
528
600
  cond_clust["column"] = Rcpp::as<Rcpp::CharacterVector>(colnames_ord[model_outputs.all_trees[outl_col][curr_tree].col_num]);
529
601
  break;
530
602
  }
603
+
604
+ default:
605
+ {
606
+ assert(0);
607
+ break;
608
+ }
531
609
  }
532
610
 
533
611
  /* add conditions from tree */
@@ -599,6 +677,7 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs,
599
677
  break;
600
678
  }
601
679
 
680
+ default: {}
602
681
  }
603
682
  break;
604
683
  }
@@ -696,6 +775,7 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs,
696
775
  break;
697
776
  }
698
777
 
778
+ default: {}
699
779
  }
700
780
  break;
701
781
  }
@@ -758,10 +838,16 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs,
758
838
  break;
759
839
  }
760
840
 
841
+ default: {}
761
842
  }
762
843
  break;
763
844
  }
764
845
 
846
+ default:
847
+ {
848
+ assert(0);
849
+ break;
850
+ }
765
851
  }
766
852
  }
767
853
 
@@ -796,6 +882,12 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs,
796
882
  cond_clust["column"] = Rcpp::as<Rcpp::CharacterVector>(colnames_ord[model_outputs.all_trees[outl_col][parent_tree].col_num]);
797
883
  break;
798
884
  }
885
+
886
+ default:
887
+ {
888
+ assert(0);
889
+ break;
890
+ }
799
891
  }
800
892
 
801
893
 
@@ -835,6 +927,11 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs,
835
927
  cond_clust["value_comp"] = Rcpp::as<Rcpp::CharacterVector>(NA_STRING);
836
928
  break;
837
929
  }
930
+
931
+ default:
932
+ {
933
+ unexpected_error();
934
+ }
838
935
  }
839
936
  break;
840
937
  }
@@ -1011,6 +1108,11 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs,
1011
1108
  break;
1012
1109
  }
1013
1110
 
1111
+ default:
1112
+ {
1113
+ assert(0);
1114
+ break;
1115
+ }
1014
1116
  }
1015
1117
 
1016
1118
 
@@ -1038,6 +1140,37 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs,
1038
1140
  return outp;
1039
1141
  }
1040
1142
 
1143
+ struct args_describe_outliers {
1144
+ ModelOutputs *model_outputs;
1145
+ double *arr_num;
1146
+ int *arr_cat;
1147
+ int *arr_ord;
1148
+ Rcpp::ListOf<Rcpp::StringVector> *cat_levels;
1149
+ Rcpp::ListOf<Rcpp::StringVector> *ord_levels;
1150
+ Rcpp::StringVector *colnames_num;
1151
+ Rcpp::StringVector *colnames_cat;
1152
+ Rcpp::StringVector *colnames_ord;
1153
+ Rcpp::NumericVector *min_date;
1154
+ Rcpp::NumericVector *min_ts;
1155
+ };
1156
+
1157
+ SEXP describe_outliers_wrapper(void *args_)
1158
+ {
1159
+ args_describe_outliers *args = (args_describe_outliers*)args_;
1160
+ return describe_outliers(*(args->model_outputs),
1161
+ args->arr_num,
1162
+ args->arr_cat,
1163
+ args->arr_ord,
1164
+ *(args->cat_levels),
1165
+ *(args->ord_levels),
1166
+ *(args->colnames_num),
1167
+ *(args->colnames_cat),
1168
+ *(args->colnames_ord),
1169
+ *(args->min_date),
1170
+ *(args->min_ts));
1171
+ }
1172
+
1173
+
1041
1174
  /* for extracting info about flaggable outliers */
1042
1175
  Rcpp::List extract_outl_bounds(ModelOutputs &model_outputs,
1043
1176
  Rcpp::ListOf<Rcpp::StringVector> cat_levels,
@@ -1102,9 +1235,27 @@ Rcpp::List extract_outl_bounds(ModelOutputs &model_outputs,
1102
1235
  return outp;
1103
1236
  }
1104
1237
 
1238
+ struct args_extract_outl_bounds {
1239
+ ModelOutputs *model_outputs;
1240
+ Rcpp::ListOf<Rcpp::StringVector> *cat_levels;
1241
+ Rcpp::ListOf<Rcpp::StringVector> *ord_levels;
1242
+ Rcpp::NumericVector *min_date;
1243
+ Rcpp::NumericVector *min_ts;
1244
+ };
1245
+
1246
+ SEXP extract_outl_bounds_wrapper(void *args_)
1247
+ {
1248
+ args_extract_outl_bounds *args = (args_extract_outl_bounds*)args_;
1249
+ return extract_outl_bounds(*(args->model_outputs),
1250
+ *(args->cat_levels),
1251
+ *(args->ord_levels),
1252
+ *(args->min_date),
1253
+ *(args->min_ts));
1254
+ }
1255
+
1105
1256
 
1106
1257
  /* external functions for fitting the model and predicting outliers */
1107
- // [[Rcpp::export]]
1258
+ // [[Rcpp::export(rng = false)]]
1108
1259
  Rcpp::List fit_OutlierTree(Rcpp::NumericVector arr_num, size_t ncols_numeric,
1109
1260
  Rcpp::IntegerVector arr_cat, size_t ncols_categ, Rcpp::IntegerVector ncat,
1110
1261
  Rcpp::IntegerVector arr_ord, size_t ncols_ord, Rcpp::IntegerVector ncat_ord,
@@ -1121,8 +1272,17 @@ Rcpp::List fit_OutlierTree(Rcpp::NumericVector arr_num, size_t ncols_numeric,
1121
1272
  Rcpp::NumericVector min_date,
1122
1273
  Rcpp::NumericVector min_ts)
1123
1274
  {
1275
+ Rcpp::List outp = Rcpp::List::create(
1276
+ Rcpp::_["ptr_model"] = R_NilValue,
1277
+ Rcpp::_["serialized_obj"] = R_NilValue,
1278
+ Rcpp::_["bounds"] = R_NilValue,
1279
+ Rcpp::_["outliers_info"] = R_NilValue,
1280
+ Rcpp::_["ntrees"] = R_NilValue,
1281
+ Rcpp::_["nclust"] = R_NilValue,
1282
+ Rcpp::_["found_outliers"] = R_NilValue
1283
+ );
1284
+
1124
1285
  bool found_outliers;
1125
- Rcpp::List outp;
1126
1286
  size_t tot_cols = ncols_numeric + ncols_categ + ncols_ord;
1127
1287
  std::vector<char> cols_ignore;
1128
1288
  char *cols_ignore_ptr = NULL;
@@ -1132,54 +1292,70 @@ Rcpp::List fit_OutlierTree(Rcpp::NumericVector arr_num, size_t ncols_numeric,
1132
1292
  cols_ignore_ptr = &cols_ignore[0];
1133
1293
  }
1134
1294
  std::vector<double> Xcpp;
1135
- double *arr_num_C = set_R_nan_as_C_nan(&arr_num[0], Xcpp, arr_num.size(), nthreads);
1295
+ double *arr_num_C = set_R_nan_as_C_nan(REAL(arr_num), Xcpp, arr_num.size(), nthreads);
1136
1296
 
1137
1297
  std::unique_ptr<ModelOutputs> model_outputs = std::unique_ptr<ModelOutputs>(new ModelOutputs());
1298
+ try {
1138
1299
  found_outliers = fit_outliers_models(*model_outputs,
1139
1300
  arr_num_C, ncols_numeric,
1140
- &arr_cat[0], ncols_categ, &ncat[0],
1141
- &arr_ord[0], ncols_ord, &ncat_ord[0],
1301
+ INTEGER(arr_cat), ncols_categ, INTEGER(ncat),
1302
+ INTEGER(arr_ord), ncols_ord, INTEGER(ncat_ord),
1142
1303
  nrows, cols_ignore_ptr, nthreads,
1143
1304
  categ_as_bin, ord_as_bin, cat_bruteforce_subset, categ_from_maj, take_mid,
1144
1305
  max_depth, max_perc_outliers, min_size_numeric, min_size_categ,
1145
1306
  min_gain, gain_as_pct, follow_all, z_norm, z_outlier);
1146
1307
 
1147
- outp["bounds"] = extract_outl_bounds(*model_outputs,
1148
- cat_levels,
1149
- ord_levels,
1150
- min_date,
1151
- min_ts);
1152
-
1308
+ args_extract_outl_bounds temp = {
1309
+ model_outputs.get(),
1310
+ &cat_levels,
1311
+ &ord_levels,
1312
+ &min_date,
1313
+ &min_ts
1314
+ };
1315
+ outp["bounds"] = Rcpp::unwindProtect(extract_outl_bounds_wrapper, (void*)&temp);
1153
1316
  outp["serialized_obj"] = serialize_OutlierTree(model_outputs.get());
1317
+ } catch(std::bad_alloc &e) {
1318
+ Rcpp::stop("Insufficient memory.\n");
1319
+ }
1320
+
1321
+ if (!Rf_xlength(outp["serialized_obj"]))
1322
+ return outp;
1154
1323
  if (return_outliers) {
1155
- outp["outliers_info"] = describe_outliers(*model_outputs,
1156
- arr_num_C,
1157
- &arr_cat[0],
1158
- &arr_ord[0],
1159
- cat_levels,
1160
- ord_levels,
1161
- colnames_num,
1162
- colnames_cat,
1163
- colnames_ord,
1164
- min_date,
1165
- min_ts);
1324
+ args_describe_outliers temp = {
1325
+ model_outputs.get(),
1326
+ arr_num_C,
1327
+ INTEGER(arr_cat),
1328
+ INTEGER(arr_ord),
1329
+ &cat_levels,
1330
+ &ord_levels,
1331
+ &colnames_num,
1332
+ &colnames_cat,
1333
+ &colnames_ord,
1334
+ &min_date,
1335
+ &min_ts
1336
+ };
1337
+ outp["outliers_info"] = Rcpp::unwindProtect(describe_outliers_wrapper, (void*)&temp);
1166
1338
  }
1339
+ forget_row_outputs(*model_outputs);
1340
+
1167
1341
  /* add number of trees and clusters */
1168
1342
  size_t ntrees = 0, nclust = 0;
1169
1343
  for (size_t col = 0; col < model_outputs->all_trees.size(); col++) {
1170
1344
  ntrees += model_outputs->all_trees[col].size();
1171
1345
  nclust += model_outputs->all_clusters[col].size();
1172
1346
  }
1173
- outp["ntrees"] = Rcpp::wrap((int) ntrees);
1174
- outp["nclust"] = Rcpp::wrap((int) nclust);
1175
- outp["found_outliers"] = Rcpp::wrap(found_outliers);
1347
+ int ntrees_int = (int)ntrees;
1348
+ int nclust_int = (int)nclust;
1349
+ outp["ntrees"] = Rcpp::unwindProtect(safe_int, (void*)&ntrees_int);
1350
+ outp["nclust"] = Rcpp::unwindProtect(safe_int, (void*)&nclust_int);
1351
+ outp["found_outliers"] = Rcpp::unwindProtect(safe_bool, (void*)&found_outliers);
1176
1352
 
1177
- forget_row_outputs(*model_outputs);
1178
- outp["ptr_model"] = Rcpp::XPtr<ModelOutputs>(model_outputs.release(), true);
1353
+ outp["ptr_model"] = Rcpp::unwindProtect(safe_XPtr, model_outputs.get());
1354
+ model_outputs.release();
1179
1355
  return outp;
1180
1356
  }
1181
1357
 
1182
- // [[Rcpp::export]]
1358
+ // [[Rcpp::export(rng = false)]]
1183
1359
  Rcpp::List predict_OutlierTree(SEXP ptr_model, size_t nrows, int nthreads,
1184
1360
  Rcpp::NumericVector arr_num, Rcpp::IntegerVector arr_cat, Rcpp::IntegerVector arr_ord,
1185
1361
  Rcpp::ListOf<Rcpp::StringVector> cat_levels,
@@ -1190,36 +1366,59 @@ Rcpp::List predict_OutlierTree(SEXP ptr_model, size_t nrows, int nthreads,
1190
1366
  Rcpp::NumericVector min_date,
1191
1367
  Rcpp::NumericVector min_ts)
1192
1368
  {
1193
- std::vector<double> Xcpp;
1194
- double *arr_num_C = set_R_nan_as_C_nan(&arr_num[0], Xcpp, arr_num.size(), nthreads);
1369
+ Rcpp::NumericVector Xcpp;
1370
+ double *arr_num_C = set_R_nan_as_C_nan(REAL(arr_num), Xcpp, arr_num.size(), nthreads);
1195
1371
 
1196
1372
  ModelOutputs *model_outputs = static_cast<ModelOutputs*>(R_ExternalPtrAddr(ptr_model));
1197
- bool found_outliers = find_new_outliers(&arr_num[0], &arr_cat[0], &arr_ord[0],
1373
+ bool found_outliers = find_new_outliers(arr_num_C, INTEGER(arr_cat), INTEGER(arr_ord),
1198
1374
  nrows, nthreads, *model_outputs);
1199
- Rcpp::List outp = describe_outliers(*model_outputs,
1200
- arr_num_C,
1201
- &arr_cat[0],
1202
- &arr_ord[0],
1203
- cat_levels,
1204
- ord_levels,
1205
- colnames_num,
1206
- colnames_cat,
1207
- colnames_ord,
1208
- min_date,
1209
- min_ts);
1210
- outp["found_outliers"] = Rcpp::LogicalVector(found_outliers);
1375
+ args_describe_outliers temp = {
1376
+ model_outputs,
1377
+ arr_num_C,
1378
+ INTEGER(arr_cat),
1379
+ INTEGER(arr_ord),
1380
+ &cat_levels,
1381
+ &ord_levels,
1382
+ &colnames_num,
1383
+ &colnames_cat,
1384
+ &colnames_ord,
1385
+ &min_date,
1386
+ &min_ts
1387
+ };
1388
+
1389
+ Rcpp::List outp;
1390
+ try {
1391
+ outp = Rcpp::unwindProtect(describe_outliers_wrapper, (void*)&temp);
1392
+ } catch(...) {
1393
+ forget_row_outputs(*model_outputs);
1394
+ throw;
1395
+ }
1211
1396
  forget_row_outputs(*model_outputs);
1397
+ outp["found_outliers"] = Rcpp::LogicalVector(found_outliers);
1212
1398
  return outp;
1213
1399
  }
1214
1400
 
1215
- // [[Rcpp::export]]
1401
+ // [[Rcpp::export(rng = false)]]
1216
1402
  Rcpp::LogicalVector check_few_values(Rcpp::NumericVector arr_num, size_t nrows, size_t ncols, int nthreads)
1217
1403
  {
1218
- std::vector<char> too_few_vals(ncols, 0);
1219
- check_more_two_values(&arr_num[0], nrows, ncols, nthreads, too_few_vals.data());
1220
1404
  Rcpp::LogicalVector outp(ncols);
1405
+ std::vector<char> too_few_vals(ncols, 0);
1406
+ check_more_two_values(REAL(arr_num), nrows, ncols, nthreads, too_few_vals.data());
1221
1407
  for (size_t col = 0; col < ncols; col++) {
1222
1408
  outp[col] = (bool) too_few_vals[col];
1223
1409
  }
1224
1410
  return outp;
1225
1411
  }
1412
+
1413
+
1414
+ // [[Rcpp::export(rng = false)]]
1415
+ bool R_has_openmp()
1416
+ {
1417
+ #ifdef _OPENMP
1418
+ return true;
1419
+ #else
1420
+ return false;
1421
+ #endif
1422
+ }
1423
+
1424
+ #endif /* _FOR_R */
@@ -74,7 +74,7 @@
74
74
  */
75
75
  void find_outlier_categories(size_t categ_counts[], size_t ncateg, size_t tot, double max_perc_outliers,
76
76
  long double perc_threshold[], size_t buffer_ix[], long double buffer_perc[],
77
- double z_norm, char is_outlier[], bool *found_outliers, bool *new_is_outlier,
77
+ double z_norm, signed char is_outlier[], bool *found_outliers, bool *new_is_outlier,
78
78
  double *next_most_comm)
79
79
  {
80
80
  //TODO: must also establish bounds for new, unseen categories
@@ -90,7 +90,7 @@ void find_outlier_categories(size_t categ_counts[], size_t ncateg, size_t tot, d
90
90
  size_t size_tail = 0;
91
91
 
92
92
  /* reset the temporary arrays and fill them */
93
- memset(is_outlier, 0, ncateg * sizeof(char));
93
+ memset(is_outlier, 0, ncateg * sizeof(signed char));
94
94
  for (size_t cat = 0; cat < ncateg; cat++) {
95
95
  buffer_ix[cat] = cat;
96
96
  buffer_perc[cat] = (categ_counts[cat] > 0)? ((long double)categ_counts[cat] / tot_dbl) : 0;
@@ -225,13 +225,13 @@ void find_outlier_categories(size_t categ_counts[], size_t ncateg, size_t tot, d
225
225
  * Category to which the majority of the observations belong.
226
226
  */
227
227
  void find_outlier_categories_by_maj(size_t categ_counts[], size_t ncateg, size_t tot, double max_perc_outliers,
228
- long double prior_prob[], double z_outlier, char is_outlier[],
228
+ long double prior_prob[], double z_outlier, signed char is_outlier[],
229
229
  bool *found_outliers, bool *new_is_outlier, int *categ_maj)
230
230
  {
231
231
  /* initialize parameters as needed */
232
232
  *found_outliers = false;
233
233
  *new_is_outlier = false;
234
- memset(is_outlier, 0, ncateg * sizeof(char));
234
+ memset(is_outlier, 0, ncateg * sizeof(signed char));
235
235
  size_t max_outliers = (size_t) calculate_max_outliers((long double)tot, max_perc_outliers);
236
236
  long double tot_dbl = (long double) (tot + 1);
237
237
  size_t n_non_maj;
@@ -283,7 +283,7 @@ void find_outlier_categories_by_maj(size_t categ_counts[], size_t ncateg, size_t
283
283
  * Proportion of the least common non-outlier category.
284
284
  */
285
285
  bool find_outlier_categories_no_cond(size_t categ_counts[], size_t ncateg, size_t tot,
286
- char is_outlier[], double *next_most_comm)
286
+ signed char is_outlier[], double *next_most_comm)
287
287
  {
288
288
  /* if sample is too small, don't flag any as outliers */
289
289
  if (tot < 1000) return false;
@@ -296,7 +296,7 @@ bool find_outlier_categories_no_cond(size_t categ_counts[], size_t ncateg, size_
296
296
 
297
297
  /* look if there's any category meeting the first condition and none meeting the second one */
298
298
  bool has_outlier_cat = false;
299
- memset(is_outlier, 0, sizeof(char) * ncateg);
299
+ memset(is_outlier, 0, sizeof(signed char) * ncateg);
300
300
  for (size_t cat = 0; cat < ncateg; cat++) {
301
301
  if (categ_counts[cat] > max_outliers && categ_counts[cat] < max_next_most_comm) {
302
302
  has_outlier_cat = false;