outliertree 0.2.1 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,5 +1,9 @@
1
+ #ifdef _FOR_R
2
+
1
3
  #include <Rcpp.h>
4
+ #include <Rcpp/unwindProtect.h>
2
5
  // [[Rcpp::plugins(cpp11)]]
6
+ // [[Rcpp::plugins(unwindProtect)]]
3
7
 
4
8
  /* This is to serialize the model objects */
5
9
  // [[Rcpp::depends(Rcereal)]]
@@ -7,14 +11,22 @@
7
11
  #include <cereal/types/vector.hpp>
8
12
  #include <sstream>
9
13
  #include <string>
14
+ #include <limits>
10
15
 
11
16
  /* This is the package's header */
12
17
  #include "outlier_tree.hpp"
13
18
 
19
+ SEXP alloc_RawVec(void *data)
20
+ {
21
+ size_t vec_size = *(size_t*)data;
22
+ if (vec_size > (size_t)std::numeric_limits<R_xlen_t>::max())
23
+ Rcpp::stop("Resulting model is too large for R to handle.");
24
+ return Rcpp::RawVector((R_xlen_t)vec_size);
25
+ }
26
+
14
27
  /* for model serialization and re-usage in R */
15
28
  /* https://stackoverflow.com/questions/18474292/how-to-handle-c-internal-data-structure-in-r-in-order-to-allow-save-load */
16
29
  /* this extra comment below the link is a workaround for Rcpp issue 675 in GitHub, do not remove it */
17
- #include <Rinternals.h>
18
30
  Rcpp::RawVector serialize_OutlierTree(ModelOutputs *model_outputs)
19
31
  {
20
32
  std::stringstream ss;
@@ -23,27 +35,60 @@ Rcpp::RawVector serialize_OutlierTree(ModelOutputs *model_outputs)
23
35
  oarchive(*model_outputs);
24
36
  }
25
37
  ss.seekg(0, ss.end);
26
- Rcpp::RawVector retval(ss.tellg());
38
+ std::stringstream::pos_type vec_size = ss.tellg();
39
+ if (vec_size <= 0) {
40
+ Rcpp::Rcerr << "Error: model is too big to serialize, resulting object will not be usable.\n" << std::endl;
41
+ return Rcpp::RawVector();
42
+ }
43
+ size_t vec_size_ = (size_t)vec_size;
44
+ Rcpp::RawVector retval = Rcpp::unwindProtect(alloc_RawVec, (void*)&vec_size_);
45
+ if (!retval.size())
46
+ return retval;
27
47
  ss.seekg(0, ss.beg);
28
- ss.read(reinterpret_cast<char*>(&retval[0]), retval.size());
48
+ ss.read(reinterpret_cast<char*>(RAW(retval)), retval.size());
29
49
  return retval;
30
50
  }
31
51
 
32
- // [[Rcpp::export]]
33
- SEXP deserialize_OutlierTree(Rcpp::RawVector src)
52
+ SEXP safe_XPtr(void *model_ptr)
53
+ {
54
+ return Rcpp::XPtr<ModelOutputs>((ModelOutputs*)model_ptr, true);
55
+ }
56
+
57
+ void R_delete_model(SEXP R_ptr)
58
+ {
59
+ ModelOutputs *model = static_cast<ModelOutputs*>(R_ExternalPtrAddr(R_ptr));
60
+ delete model;
61
+ R_ClearExternalPtr(R_ptr);
62
+ }
63
+
64
+ // [[Rcpp::export(rng = false)]]
65
+ SEXP deserialize_OutlierTree(Rcpp::RawVector src, SEXP ptr_obj)
34
66
  {
35
67
  std::stringstream ss;
36
- ss.write(reinterpret_cast<char*>(&src[0]), src.size());
68
+ ss.write(reinterpret_cast<char*>(RAW(src)), src.size());
37
69
  ss.seekg(0, ss.beg);
38
70
  std::unique_ptr<ModelOutputs> model_outputs = std::unique_ptr<ModelOutputs>(new ModelOutputs());
39
71
  {
40
72
  cereal::BinaryInputArchive iarchive(ss);
41
73
  iarchive(*model_outputs);
42
74
  }
43
- return Rcpp::XPtr<ModelOutputs>(model_outputs.release(), true);
75
+ R_SetExternalPtrAddr(ptr_obj, model_outputs.get());
76
+ R_RegisterCFinalizerEx(ptr_obj, R_delete_model, TRUE);
77
+ model_outputs.release();
78
+ return R_NilValue;
44
79
  }
45
80
 
46
- // [[Rcpp::export]]
81
+ SEXP safe_int(void *x)
82
+ {
83
+ return Rcpp::wrap(*(int*)x);
84
+ }
85
+
86
+ SEXP safe_bool(void *x)
87
+ {
88
+ return Rcpp::wrap(*(bool*)x);
89
+ }
90
+
91
+ // [[Rcpp::export(rng = false)]]
47
92
  Rcpp::LogicalVector check_null_ptr_model(SEXP ptr_model)
48
93
  {
49
94
  return Rcpp::LogicalVector(R_ExternalPtrAddr(ptr_model) == NULL);
@@ -59,12 +104,22 @@ double* set_R_nan_as_C_nan(double *restrict x_R, std::vector<double> &x_C, size_
59
104
  return x_C.data();
60
105
  }
61
106
 
107
+ double* set_R_nan_as_C_nan(double *restrict x_R, Rcpp::NumericVector &x_C, size_t n, int nthreads)
108
+ {
109
+ x_C = Rcpp::NumericVector(x_R, x_R + n);
110
+ #pragma omp parallel for schedule(static) num_threads(nthreads) shared(x_R, x_C, n)
111
+ for (size_t_for i = 0; i < n; i++)
112
+ if (isnan(x_R[i]) || Rcpp::NumericVector::is_na(x_R[i]) || Rcpp::traits::is_nan<REALSXP>(x_R[i]))
113
+ x_C[i] = NAN;
114
+ return REAL(x_C);
115
+ }
116
+
62
117
 
63
118
  /* for predicting outliers */
64
119
  Rcpp::List describe_outliers(ModelOutputs &model_outputs,
65
- double *arr_num,
66
- int *arr_cat,
67
- int *arr_ord,
120
+ double *restrict arr_num,
121
+ int *restrict arr_cat,
122
+ int *restrict arr_ord,
68
123
  Rcpp::ListOf<Rcpp::StringVector> cat_levels,
69
124
  Rcpp::ListOf<Rcpp::StringVector> ord_levels,
70
125
  Rcpp::StringVector colnames_num,
@@ -345,6 +400,12 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs,
345
400
  }
346
401
  break;
347
402
  }
403
+
404
+ default:
405
+ {
406
+ assert(0);
407
+ break;
408
+ }
348
409
  }
349
410
 
350
411
  /* add the comparison point */
@@ -377,6 +438,11 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs,
377
438
  cond_clust["value_comp"] = Rcpp::as<Rcpp::CharacterVector>(NA_STRING);
378
439
  break;
379
440
  }
441
+
442
+ default:
443
+ {
444
+ unexpected_error();
445
+ }
380
446
  }
381
447
  break;
382
448
  }
@@ -492,6 +558,12 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs,
492
558
  }
493
559
  break;
494
560
  }
561
+
562
+ default:
563
+ {
564
+ assert(0);
565
+ break;
566
+ }
495
567
 
496
568
  }
497
569
  lst_cond[row] = Rcpp::List::create(Rcpp::clone(cond_clust));
@@ -528,6 +600,12 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs,
528
600
  cond_clust["column"] = Rcpp::as<Rcpp::CharacterVector>(colnames_ord[model_outputs.all_trees[outl_col][curr_tree].col_num]);
529
601
  break;
530
602
  }
603
+
604
+ default:
605
+ {
606
+ assert(0);
607
+ break;
608
+ }
531
609
  }
532
610
 
533
611
  /* add conditions from tree */
@@ -599,6 +677,7 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs,
599
677
  break;
600
678
  }
601
679
 
680
+ default: {}
602
681
  }
603
682
  break;
604
683
  }
@@ -696,6 +775,7 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs,
696
775
  break;
697
776
  }
698
777
 
778
+ default: {}
699
779
  }
700
780
  break;
701
781
  }
@@ -758,10 +838,16 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs,
758
838
  break;
759
839
  }
760
840
 
841
+ default: {}
761
842
  }
762
843
  break;
763
844
  }
764
845
 
846
+ default:
847
+ {
848
+ assert(0);
849
+ break;
850
+ }
765
851
  }
766
852
  }
767
853
 
@@ -796,6 +882,12 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs,
796
882
  cond_clust["column"] = Rcpp::as<Rcpp::CharacterVector>(colnames_ord[model_outputs.all_trees[outl_col][parent_tree].col_num]);
797
883
  break;
798
884
  }
885
+
886
+ default:
887
+ {
888
+ assert(0);
889
+ break;
890
+ }
799
891
  }
800
892
 
801
893
 
@@ -835,6 +927,11 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs,
835
927
  cond_clust["value_comp"] = Rcpp::as<Rcpp::CharacterVector>(NA_STRING);
836
928
  break;
837
929
  }
930
+
931
+ default:
932
+ {
933
+ unexpected_error();
934
+ }
838
935
  }
839
936
  break;
840
937
  }
@@ -1011,6 +1108,11 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs,
1011
1108
  break;
1012
1109
  }
1013
1110
 
1111
+ default:
1112
+ {
1113
+ assert(0);
1114
+ break;
1115
+ }
1014
1116
  }
1015
1117
 
1016
1118
 
@@ -1038,6 +1140,37 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs,
1038
1140
  return outp;
1039
1141
  }
1040
1142
 
1143
+ struct args_describe_outliers {
1144
+ ModelOutputs *model_outputs;
1145
+ double *arr_num;
1146
+ int *arr_cat;
1147
+ int *arr_ord;
1148
+ Rcpp::ListOf<Rcpp::StringVector> *cat_levels;
1149
+ Rcpp::ListOf<Rcpp::StringVector> *ord_levels;
1150
+ Rcpp::StringVector *colnames_num;
1151
+ Rcpp::StringVector *colnames_cat;
1152
+ Rcpp::StringVector *colnames_ord;
1153
+ Rcpp::NumericVector *min_date;
1154
+ Rcpp::NumericVector *min_ts;
1155
+ };
1156
+
1157
+ SEXP describe_outliers_wrapper(void *args_)
1158
+ {
1159
+ args_describe_outliers *args = (args_describe_outliers*)args_;
1160
+ return describe_outliers(*(args->model_outputs),
1161
+ args->arr_num,
1162
+ args->arr_cat,
1163
+ args->arr_ord,
1164
+ *(args->cat_levels),
1165
+ *(args->ord_levels),
1166
+ *(args->colnames_num),
1167
+ *(args->colnames_cat),
1168
+ *(args->colnames_ord),
1169
+ *(args->min_date),
1170
+ *(args->min_ts));
1171
+ }
1172
+
1173
+
1041
1174
  /* for extracting info about flaggable outliers */
1042
1175
  Rcpp::List extract_outl_bounds(ModelOutputs &model_outputs,
1043
1176
  Rcpp::ListOf<Rcpp::StringVector> cat_levels,
@@ -1102,9 +1235,27 @@ Rcpp::List extract_outl_bounds(ModelOutputs &model_outputs,
1102
1235
  return outp;
1103
1236
  }
1104
1237
 
1238
+ struct args_extract_outl_bounds {
1239
+ ModelOutputs *model_outputs;
1240
+ Rcpp::ListOf<Rcpp::StringVector> *cat_levels;
1241
+ Rcpp::ListOf<Rcpp::StringVector> *ord_levels;
1242
+ Rcpp::NumericVector *min_date;
1243
+ Rcpp::NumericVector *min_ts;
1244
+ };
1245
+
1246
+ SEXP extract_outl_bounds_wrapper(void *args_)
1247
+ {
1248
+ args_extract_outl_bounds *args = (args_extract_outl_bounds*)args_;
1249
+ return extract_outl_bounds(*(args->model_outputs),
1250
+ *(args->cat_levels),
1251
+ *(args->ord_levels),
1252
+ *(args->min_date),
1253
+ *(args->min_ts));
1254
+ }
1255
+
1105
1256
 
1106
1257
  /* external functions for fitting the model and predicting outliers */
1107
- // [[Rcpp::export]]
1258
+ // [[Rcpp::export(rng = false)]]
1108
1259
  Rcpp::List fit_OutlierTree(Rcpp::NumericVector arr_num, size_t ncols_numeric,
1109
1260
  Rcpp::IntegerVector arr_cat, size_t ncols_categ, Rcpp::IntegerVector ncat,
1110
1261
  Rcpp::IntegerVector arr_ord, size_t ncols_ord, Rcpp::IntegerVector ncat_ord,
@@ -1121,8 +1272,17 @@ Rcpp::List fit_OutlierTree(Rcpp::NumericVector arr_num, size_t ncols_numeric,
1121
1272
  Rcpp::NumericVector min_date,
1122
1273
  Rcpp::NumericVector min_ts)
1123
1274
  {
1275
+ Rcpp::List outp = Rcpp::List::create(
1276
+ Rcpp::_["ptr_model"] = R_NilValue,
1277
+ Rcpp::_["serialized_obj"] = R_NilValue,
1278
+ Rcpp::_["bounds"] = R_NilValue,
1279
+ Rcpp::_["outliers_info"] = R_NilValue,
1280
+ Rcpp::_["ntrees"] = R_NilValue,
1281
+ Rcpp::_["nclust"] = R_NilValue,
1282
+ Rcpp::_["found_outliers"] = R_NilValue
1283
+ );
1284
+
1124
1285
  bool found_outliers;
1125
- Rcpp::List outp;
1126
1286
  size_t tot_cols = ncols_numeric + ncols_categ + ncols_ord;
1127
1287
  std::vector<char> cols_ignore;
1128
1288
  char *cols_ignore_ptr = NULL;
@@ -1132,54 +1292,70 @@ Rcpp::List fit_OutlierTree(Rcpp::NumericVector arr_num, size_t ncols_numeric,
1132
1292
  cols_ignore_ptr = &cols_ignore[0];
1133
1293
  }
1134
1294
  std::vector<double> Xcpp;
1135
- double *arr_num_C = set_R_nan_as_C_nan(&arr_num[0], Xcpp, arr_num.size(), nthreads);
1295
+ double *arr_num_C = set_R_nan_as_C_nan(REAL(arr_num), Xcpp, arr_num.size(), nthreads);
1136
1296
 
1137
1297
  std::unique_ptr<ModelOutputs> model_outputs = std::unique_ptr<ModelOutputs>(new ModelOutputs());
1298
+ try {
1138
1299
  found_outliers = fit_outliers_models(*model_outputs,
1139
1300
  arr_num_C, ncols_numeric,
1140
- &arr_cat[0], ncols_categ, &ncat[0],
1141
- &arr_ord[0], ncols_ord, &ncat_ord[0],
1301
+ INTEGER(arr_cat), ncols_categ, INTEGER(ncat),
1302
+ INTEGER(arr_ord), ncols_ord, INTEGER(ncat_ord),
1142
1303
  nrows, cols_ignore_ptr, nthreads,
1143
1304
  categ_as_bin, ord_as_bin, cat_bruteforce_subset, categ_from_maj, take_mid,
1144
1305
  max_depth, max_perc_outliers, min_size_numeric, min_size_categ,
1145
1306
  min_gain, gain_as_pct, follow_all, z_norm, z_outlier);
1146
1307
 
1147
- outp["bounds"] = extract_outl_bounds(*model_outputs,
1148
- cat_levels,
1149
- ord_levels,
1150
- min_date,
1151
- min_ts);
1152
-
1308
+ args_extract_outl_bounds temp = {
1309
+ model_outputs.get(),
1310
+ &cat_levels,
1311
+ &ord_levels,
1312
+ &min_date,
1313
+ &min_ts
1314
+ };
1315
+ outp["bounds"] = Rcpp::unwindProtect(extract_outl_bounds_wrapper, (void*)&temp);
1153
1316
  outp["serialized_obj"] = serialize_OutlierTree(model_outputs.get());
1317
+ } catch(std::bad_alloc &e) {
1318
+ Rcpp::stop("Insufficient memory.\n");
1319
+ }
1320
+
1321
+ if (!Rf_xlength(outp["serialized_obj"]))
1322
+ return outp;
1154
1323
  if (return_outliers) {
1155
- outp["outliers_info"] = describe_outliers(*model_outputs,
1156
- arr_num_C,
1157
- &arr_cat[0],
1158
- &arr_ord[0],
1159
- cat_levels,
1160
- ord_levels,
1161
- colnames_num,
1162
- colnames_cat,
1163
- colnames_ord,
1164
- min_date,
1165
- min_ts);
1324
+ args_describe_outliers temp = {
1325
+ model_outputs.get(),
1326
+ arr_num_C,
1327
+ INTEGER(arr_cat),
1328
+ INTEGER(arr_ord),
1329
+ &cat_levels,
1330
+ &ord_levels,
1331
+ &colnames_num,
1332
+ &colnames_cat,
1333
+ &colnames_ord,
1334
+ &min_date,
1335
+ &min_ts
1336
+ };
1337
+ outp["outliers_info"] = Rcpp::unwindProtect(describe_outliers_wrapper, (void*)&temp);
1166
1338
  }
1339
+ forget_row_outputs(*model_outputs);
1340
+
1167
1341
  /* add number of trees and clusters */
1168
1342
  size_t ntrees = 0, nclust = 0;
1169
1343
  for (size_t col = 0; col < model_outputs->all_trees.size(); col++) {
1170
1344
  ntrees += model_outputs->all_trees[col].size();
1171
1345
  nclust += model_outputs->all_clusters[col].size();
1172
1346
  }
1173
- outp["ntrees"] = Rcpp::wrap((int) ntrees);
1174
- outp["nclust"] = Rcpp::wrap((int) nclust);
1175
- outp["found_outliers"] = Rcpp::wrap(found_outliers);
1347
+ int ntrees_int = (int)ntrees;
1348
+ int nclust_int = (int)nclust;
1349
+ outp["ntrees"] = Rcpp::unwindProtect(safe_int, (void*)&ntrees_int);
1350
+ outp["nclust"] = Rcpp::unwindProtect(safe_int, (void*)&nclust_int);
1351
+ outp["found_outliers"] = Rcpp::unwindProtect(safe_bool, (void*)&found_outliers);
1176
1352
 
1177
- forget_row_outputs(*model_outputs);
1178
- outp["ptr_model"] = Rcpp::XPtr<ModelOutputs>(model_outputs.release(), true);
1353
+ outp["ptr_model"] = Rcpp::unwindProtect(safe_XPtr, model_outputs.get());
1354
+ model_outputs.release();
1179
1355
  return outp;
1180
1356
  }
1181
1357
 
1182
- // [[Rcpp::export]]
1358
+ // [[Rcpp::export(rng = false)]]
1183
1359
  Rcpp::List predict_OutlierTree(SEXP ptr_model, size_t nrows, int nthreads,
1184
1360
  Rcpp::NumericVector arr_num, Rcpp::IntegerVector arr_cat, Rcpp::IntegerVector arr_ord,
1185
1361
  Rcpp::ListOf<Rcpp::StringVector> cat_levels,
@@ -1190,36 +1366,59 @@ Rcpp::List predict_OutlierTree(SEXP ptr_model, size_t nrows, int nthreads,
1190
1366
  Rcpp::NumericVector min_date,
1191
1367
  Rcpp::NumericVector min_ts)
1192
1368
  {
1193
- std::vector<double> Xcpp;
1194
- double *arr_num_C = set_R_nan_as_C_nan(&arr_num[0], Xcpp, arr_num.size(), nthreads);
1369
+ Rcpp::NumericVector Xcpp;
1370
+ double *arr_num_C = set_R_nan_as_C_nan(REAL(arr_num), Xcpp, arr_num.size(), nthreads);
1195
1371
 
1196
1372
  ModelOutputs *model_outputs = static_cast<ModelOutputs*>(R_ExternalPtrAddr(ptr_model));
1197
- bool found_outliers = find_new_outliers(&arr_num[0], &arr_cat[0], &arr_ord[0],
1373
+ bool found_outliers = find_new_outliers(arr_num_C, INTEGER(arr_cat), INTEGER(arr_ord),
1198
1374
  nrows, nthreads, *model_outputs);
1199
- Rcpp::List outp = describe_outliers(*model_outputs,
1200
- arr_num_C,
1201
- &arr_cat[0],
1202
- &arr_ord[0],
1203
- cat_levels,
1204
- ord_levels,
1205
- colnames_num,
1206
- colnames_cat,
1207
- colnames_ord,
1208
- min_date,
1209
- min_ts);
1210
- outp["found_outliers"] = Rcpp::LogicalVector(found_outliers);
1375
+ args_describe_outliers temp = {
1376
+ model_outputs,
1377
+ arr_num_C,
1378
+ INTEGER(arr_cat),
1379
+ INTEGER(arr_ord),
1380
+ &cat_levels,
1381
+ &ord_levels,
1382
+ &colnames_num,
1383
+ &colnames_cat,
1384
+ &colnames_ord,
1385
+ &min_date,
1386
+ &min_ts
1387
+ };
1388
+
1389
+ Rcpp::List outp;
1390
+ try {
1391
+ outp = Rcpp::unwindProtect(describe_outliers_wrapper, (void*)&temp);
1392
+ } catch(...) {
1393
+ forget_row_outputs(*model_outputs);
1394
+ throw;
1395
+ }
1211
1396
  forget_row_outputs(*model_outputs);
1397
+ outp["found_outliers"] = Rcpp::LogicalVector(found_outliers);
1212
1398
  return outp;
1213
1399
  }
1214
1400
 
1215
- // [[Rcpp::export]]
1401
+ // [[Rcpp::export(rng = false)]]
1216
1402
  Rcpp::LogicalVector check_few_values(Rcpp::NumericVector arr_num, size_t nrows, size_t ncols, int nthreads)
1217
1403
  {
1218
- std::vector<char> too_few_vals(ncols, 0);
1219
- check_more_two_values(&arr_num[0], nrows, ncols, nthreads, too_few_vals.data());
1220
1404
  Rcpp::LogicalVector outp(ncols);
1405
+ std::vector<char> too_few_vals(ncols, 0);
1406
+ check_more_two_values(REAL(arr_num), nrows, ncols, nthreads, too_few_vals.data());
1221
1407
  for (size_t col = 0; col < ncols; col++) {
1222
1408
  outp[col] = (bool) too_few_vals[col];
1223
1409
  }
1224
1410
  return outp;
1225
1411
  }
1412
+
1413
+
1414
+ // [[Rcpp::export(rng = false)]]
1415
+ bool R_has_openmp()
1416
+ {
1417
+ #ifdef _OPENMP
1418
+ return true;
1419
+ #else
1420
+ return false;
1421
+ #endif
1422
+ }
1423
+
1424
+ #endif /* _FOR_R */
@@ -74,7 +74,7 @@
74
74
  */
75
75
  void find_outlier_categories(size_t categ_counts[], size_t ncateg, size_t tot, double max_perc_outliers,
76
76
  long double perc_threshold[], size_t buffer_ix[], long double buffer_perc[],
77
- double z_norm, char is_outlier[], bool *found_outliers, bool *new_is_outlier,
77
+ double z_norm, signed char is_outlier[], bool *found_outliers, bool *new_is_outlier,
78
78
  double *next_most_comm)
79
79
  {
80
80
  //TODO: must also establish bounds for new, unseen categories
@@ -90,7 +90,7 @@ void find_outlier_categories(size_t categ_counts[], size_t ncateg, size_t tot, d
90
90
  size_t size_tail = 0;
91
91
 
92
92
  /* reset the temporary arrays and fill them */
93
- memset(is_outlier, 0, ncateg * sizeof(char));
93
+ memset(is_outlier, 0, ncateg * sizeof(signed char));
94
94
  for (size_t cat = 0; cat < ncateg; cat++) {
95
95
  buffer_ix[cat] = cat;
96
96
  buffer_perc[cat] = (categ_counts[cat] > 0)? ((long double)categ_counts[cat] / tot_dbl) : 0;
@@ -225,13 +225,13 @@ void find_outlier_categories(size_t categ_counts[], size_t ncateg, size_t tot, d
225
225
  * Category to which the majority of the observations belong.
226
226
  */
227
227
  void find_outlier_categories_by_maj(size_t categ_counts[], size_t ncateg, size_t tot, double max_perc_outliers,
228
- long double prior_prob[], double z_outlier, char is_outlier[],
228
+ long double prior_prob[], double z_outlier, signed char is_outlier[],
229
229
  bool *found_outliers, bool *new_is_outlier, int *categ_maj)
230
230
  {
231
231
  /* initialize parameters as needed */
232
232
  *found_outliers = false;
233
233
  *new_is_outlier = false;
234
- memset(is_outlier, 0, ncateg * sizeof(char));
234
+ memset(is_outlier, 0, ncateg * sizeof(signed char));
235
235
  size_t max_outliers = (size_t) calculate_max_outliers((long double)tot, max_perc_outliers);
236
236
  long double tot_dbl = (long double) (tot + 1);
237
237
  size_t n_non_maj;
@@ -283,7 +283,7 @@ void find_outlier_categories_by_maj(size_t categ_counts[], size_t ncateg, size_t
283
283
  * Proportion of the least common non-outlier category.
284
284
  */
285
285
  bool find_outlier_categories_no_cond(size_t categ_counts[], size_t ncateg, size_t tot,
286
- char is_outlier[], double *next_most_comm)
286
+ signed char is_outlier[], double *next_most_comm)
287
287
  {
288
288
  /* if sample is too small, don't flag any as outliers */
289
289
  if (tot < 1000) return false;
@@ -296,7 +296,7 @@ bool find_outlier_categories_no_cond(size_t categ_counts[], size_t ncateg, size_
296
296
 
297
297
  /* look if there's any category meeting the first condition and none meeting the second one */
298
298
  bool has_outlier_cat = false;
299
- memset(is_outlier, 0, sizeof(char) * ncateg);
299
+ memset(is_outlier, 0, sizeof(signed char) * ncateg);
300
300
  for (size_t cat = 0; cat < ncateg; cat++) {
301
301
  if (categ_counts[cat] > max_outliers && categ_counts[cat] < max_next_most_comm) {
302
302
  has_outlier_cat = false;