outliertree 0.2.1 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,4 +1,8 @@
1
+ #ifdef _FOR_R
2
+
1
3
  #include <Rcpp.h>
4
+ #include <Rcpp/unwindProtect.h>
5
+ #include <R_ext/Altrep.h>
2
6
  // [[Rcpp::plugins(cpp11)]]
3
7
 
4
8
  /* This is to serialize the model objects */
@@ -7,15 +11,31 @@
7
11
  #include <cereal/types/vector.hpp>
8
12
  #include <sstream>
9
13
  #include <string>
14
+ #include <limits>
10
15
 
11
16
  /* This is the package's header */
12
17
  #include "outlier_tree.hpp"
13
18
 
19
+ void delete_model_from_R_ptr(SEXP R_ptr)
20
+ {
21
+ ModelOutputs *model = static_cast<ModelOutputs*>(R_ExternalPtrAddr(R_ptr));
22
+ delete model;
23
+ R_SetExternalPtrAddr(R_ptr, nullptr);
24
+ R_ClearExternalPtr(R_ptr);
25
+ }
26
+
27
+ SEXP alloc_RawVec(void *data)
28
+ {
29
+ size_t vec_size = *(size_t*)data;
30
+ if (vec_size > (size_t)std::numeric_limits<R_xlen_t>::max())
31
+ Rcpp::stop("Resulting model is too large for R to handle.");
32
+ return Rf_allocVector(RAWSXP, vec_size);
33
+ }
34
+
14
35
  /* for model serialization and re-usage in R */
15
36
  /* https://stackoverflow.com/questions/18474292/how-to-handle-c-internal-data-structure-in-r-in-order-to-allow-save-load */
16
37
  /* this extra comment below the link is a workaround for Rcpp issue 675 in GitHub, do not remove it */
17
- #include <Rinternals.h>
18
- Rcpp::RawVector serialize_OutlierTree(ModelOutputs *model_outputs)
38
+ SEXP serialize_OutlierTree(ModelOutputs *model_outputs)
19
39
  {
20
40
  std::stringstream ss;
21
41
  {
@@ -23,30 +43,157 @@ Rcpp::RawVector serialize_OutlierTree(ModelOutputs *model_outputs)
23
43
  oarchive(*model_outputs);
24
44
  }
25
45
  ss.seekg(0, ss.end);
26
- Rcpp::RawVector retval(ss.tellg());
46
+ std::stringstream::pos_type vec_size = ss.tellg();
47
+ if (vec_size <= 0) {
48
+ Rf_error("Error: model is too big to serialize, resulting object will not be usable.\n");
49
+ }
50
+ size_t vec_size_ = (size_t)vec_size;
51
+ SEXP retval = PROTECT(Rcpp::unwindProtect(alloc_RawVec, (void*)&vec_size_));
27
52
  ss.seekg(0, ss.beg);
28
- ss.read(reinterpret_cast<char*>(&retval[0]), retval.size());
53
+ ss.read(reinterpret_cast<char*>(RAW(retval)), vec_size_);
54
+ UNPROTECT(1);
29
55
  return retval;
30
56
  }
31
57
 
32
- // [[Rcpp::export]]
33
- SEXP deserialize_OutlierTree(Rcpp::RawVector src)
58
+ SEXP deserialize_OutlierTree(SEXP src, SEXP ptr_obj)
34
59
  {
35
60
  std::stringstream ss;
36
- ss.write(reinterpret_cast<char*>(&src[0]), src.size());
61
+ ss.write(reinterpret_cast<char*>(RAW(src)), Rf_xlength(src));
37
62
  ss.seekg(0, ss.beg);
38
63
  std::unique_ptr<ModelOutputs> model_outputs = std::unique_ptr<ModelOutputs>(new ModelOutputs());
39
64
  {
40
65
  cereal::BinaryInputArchive iarchive(ss);
41
66
  iarchive(*model_outputs);
42
67
  }
43
- return Rcpp::XPtr<ModelOutputs>(model_outputs.release(), true);
68
+ R_SetExternalPtrAddr(ptr_obj, model_outputs.get());
69
+ R_RegisterCFinalizerEx(ptr_obj, delete_model_from_R_ptr, TRUE);
70
+ model_outputs.release();
71
+ return R_NilValue;
72
+ }
73
+
74
+ static R_altrep_class_t otree_altrepped_pointer_class;
75
+
76
+ R_xlen_t altrepped_pointer_length(SEXP obj)
77
+ {
78
+ return 1;
79
+ }
80
+
81
+ SEXP get_element_from_altrepped_ptr(SEXP R_altrepped_obj, R_xlen_t idx)
82
+ {
83
+ return R_altrep_data1(R_altrepped_obj);
84
+ }
85
+
86
+ Rboolean inspect_altrepped_pointer(SEXP x, int pre, int deep, int pvec, void (*inspect_subtree)(SEXP, int, int, int))
87
+ {
88
+ Rprintf("Altrepped pointer [address:%p]\n", R_ExternalPtrAddr(R_altrep_data1(x)));
89
+ return TRUE;
90
+ }
91
+
92
+ SEXP duplicate_altrepped_pointer(SEXP altrepped_obj, Rboolean deep)
93
+ {
94
+ SEXP R_ptr_name = PROTECT(Rf_mkString("ptr"));
95
+ SEXP R_ptr_class = PROTECT(Rf_mkString("otree_altrepped_handle"));
96
+ SEXP out = PROTECT(R_new_altrep(otree_altrepped_pointer_class, R_NilValue, R_NilValue));
97
+
98
+ if (!deep) {
99
+ R_set_altrep_data1(out, R_altrep_data1(altrepped_obj));
100
+ }
101
+
102
+ else {
103
+
104
+ SEXP R_ptr = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
105
+
106
+ try {
107
+ std::unique_ptr<ModelOutputs> new_obj(new ModelOutputs());
108
+ ModelOutputs *cpp_ptr = (ModelOutputs*)R_ExternalPtrAddr(R_altrep_data1(altrepped_obj));
109
+ *new_obj = *cpp_ptr;
110
+
111
+ R_SetExternalPtrAddr(R_ptr, new_obj.get());
112
+ R_RegisterCFinalizerEx(R_ptr, delete_model_from_R_ptr, TRUE);
113
+ new_obj.release();
114
+ }
115
+
116
+ catch (const std::exception &ex) {
117
+ Rf_error("%s\n", ex.what());
118
+ }
119
+
120
+ R_set_altrep_data1(out, R_ptr);
121
+ UNPROTECT(1);
122
+ }
123
+
124
+ Rf_setAttrib(out, R_NamesSymbol, R_ptr_name);
125
+ Rf_setAttrib(out, R_NamesSymbol, R_ptr_class);
126
+ UNPROTECT(3);
127
+ return out;
44
128
  }
45
129
 
46
- // [[Rcpp::export]]
47
- Rcpp::LogicalVector check_null_ptr_model(SEXP ptr_model)
130
+ SEXP serialize_altrepped_pointer(SEXP altrepped_obj)
48
131
  {
49
- return Rcpp::LogicalVector(R_ExternalPtrAddr(ptr_model) == NULL);
132
+ return serialize_OutlierTree((ModelOutputs*)R_ExternalPtrAddr(R_altrep_data1(altrepped_obj)));
133
+ }
134
+
135
+ SEXP deserialize_altrepped_pointer(SEXP cls, SEXP R_state)
136
+ {
137
+ SEXP R_ptr_name = PROTECT(Rf_mkString("ptr"));
138
+ SEXP R_ptr_class = PROTECT(Rf_mkString("otree_altrepped_handle"));
139
+ SEXP R_ptr = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
140
+ SEXP out = PROTECT(R_new_altrep(otree_altrepped_pointer_class, R_NilValue, R_NilValue));
141
+
142
+ try {
143
+ deserialize_OutlierTree(R_state, R_ptr);
144
+ }
145
+ catch (const std::exception &ex) {
146
+ Rf_error("%s\n", ex.what());
147
+ }
148
+
149
+ R_set_altrep_data1(out, R_ptr);
150
+ Rf_setAttrib(out, R_NamesSymbol, R_ptr_name);
151
+ Rf_setAttrib(out, R_ClassSymbol, R_ptr_class);
152
+
153
+ UNPROTECT(4);
154
+ return out;
155
+ }
156
+
157
+ SEXP get_altrepped_pointer(void *void_ptr)
158
+ {
159
+ SEXP R_ptr_name = PROTECT(Rf_mkString("ptr"));
160
+ SEXP R_ptr_class = PROTECT(Rf_mkString("otree_altrepped_handle"));
161
+ SEXP R_ptr = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
162
+ SEXP out = PROTECT(R_new_altrep(otree_altrepped_pointer_class, R_NilValue, R_NilValue));
163
+
164
+ std::unique_ptr<ModelOutputs> *ptr = (std::unique_ptr<ModelOutputs>*)void_ptr;
165
+ R_SetExternalPtrAddr(R_ptr, ptr->get());
166
+ R_RegisterCFinalizerEx(R_ptr, delete_model_from_R_ptr, TRUE);
167
+ ptr->release();
168
+
169
+ R_set_altrep_data1(out, R_ptr);
170
+ Rf_setAttrib(out, R_NamesSymbol, R_ptr_name);
171
+ Rf_setAttrib(out, R_ClassSymbol, R_ptr_class);
172
+
173
+ UNPROTECT(4);
174
+ return out;
175
+ }
176
+
177
+ // [[Rcpp::init]]
178
+ void init_altrepped_class(DllInfo* dll)
179
+ {
180
+ otree_altrepped_pointer_class = R_make_altlist_class("otree_altrepped_pointer_class", "outliertree", dll);
181
+ R_set_altrep_Length_method(otree_altrepped_pointer_class, altrepped_pointer_length);
182
+ R_set_altrep_Inspect_method(otree_altrepped_pointer_class, inspect_altrepped_pointer);
183
+ R_set_altrep_Serialized_state_method(otree_altrepped_pointer_class, serialize_altrepped_pointer);
184
+ R_set_altrep_Unserialize_method(otree_altrepped_pointer_class, deserialize_altrepped_pointer);
185
+ R_set_altrep_Duplicate_method(otree_altrepped_pointer_class, duplicate_altrepped_pointer);
186
+ R_set_altlist_Elt_method(otree_altrepped_pointer_class, get_element_from_altrepped_ptr);
187
+ }
188
+
189
+ SEXP safe_int(void *x)
190
+ {
191
+ return Rf_ScalarInteger(*(int*)x);
192
+ }
193
+
194
+ SEXP safe_bool(void *x)
195
+ {
196
+ return Rf_ScalarLogical(*(bool*)x);
50
197
  }
51
198
 
52
199
  double* set_R_nan_as_C_nan(double *restrict x_R, std::vector<double> &x_C, size_t n, int nthreads)
@@ -59,12 +206,22 @@ double* set_R_nan_as_C_nan(double *restrict x_R, std::vector<double> &x_C, size_
59
206
  return x_C.data();
60
207
  }
61
208
 
209
+ double* set_R_nan_as_C_nan(double *restrict x_R, Rcpp::NumericVector &x_C, size_t n, int nthreads)
210
+ {
211
+ x_C = Rcpp::NumericVector(x_R, x_R + n);
212
+ #pragma omp parallel for schedule(static) num_threads(nthreads) shared(x_R, x_C, n)
213
+ for (size_t_for i = 0; i < n; i++)
214
+ if (isnan(x_R[i]) || Rcpp::NumericVector::is_na(x_R[i]) || Rcpp::traits::is_nan<REALSXP>(x_R[i]))
215
+ x_C[i] = NAN;
216
+ return REAL(x_C);
217
+ }
218
+
62
219
 
63
220
  /* for predicting outliers */
64
221
  Rcpp::List describe_outliers(ModelOutputs &model_outputs,
65
- double *arr_num,
66
- int *arr_cat,
67
- int *arr_ord,
222
+ double *restrict arr_num,
223
+ int *restrict arr_cat,
224
+ int *restrict arr_ord,
68
225
  Rcpp::ListOf<Rcpp::StringVector> cat_levels,
69
226
  Rcpp::ListOf<Rcpp::StringVector> ord_levels,
70
227
  Rcpp::StringVector colnames_num,
@@ -209,7 +366,7 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs,
209
366
  } else if (outl_col < (ncols_num + ncols_cat)) {
210
367
  if (outl_col < (ncols_num + ncols_cat_cat)) {
211
368
  tmp_bool = Rcpp::LogicalVector(model_outputs.all_clusters[outl_col][outl_clust].subset_common.size(), false);
212
- for (size_t cat = 0; cat < tmp_bool.size(); cat++) {
369
+ for (size_t cat = 0; cat < (size_t)tmp_bool.size(); cat++) {
213
370
  if (model_outputs.all_clusters[outl_col][outl_clust].subset_common[cat] == 0) {
214
371
  tmp_bool[cat] = true;
215
372
  }
@@ -253,7 +410,7 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs,
253
410
  }
254
411
  } else {
255
412
  tmp_bool = Rcpp::LogicalVector(model_outputs.all_clusters[outl_col][outl_clust].subset_common.size(), false);
256
- for (size_t cat = 0; cat < tmp_bool.size(); cat++) {
413
+ for (size_t cat = 0; cat < (size_t)tmp_bool.size(); cat++) {
257
414
  if (model_outputs.all_clusters[outl_col][outl_clust].subset_common[cat] == 0) {
258
415
  tmp_bool[cat] = true;
259
416
  }
@@ -345,6 +502,12 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs,
345
502
  }
346
503
  break;
347
504
  }
505
+
506
+ default:
507
+ {
508
+ assert(0);
509
+ break;
510
+ }
348
511
  }
349
512
 
350
513
  /* add the comparison point */
@@ -377,6 +540,11 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs,
377
540
  cond_clust["value_comp"] = Rcpp::as<Rcpp::CharacterVector>(NA_STRING);
378
541
  break;
379
542
  }
543
+
544
+ default:
545
+ {
546
+ unexpected_error();
547
+ }
380
548
  }
381
549
  break;
382
550
  }
@@ -492,6 +660,12 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs,
492
660
  }
493
661
  break;
494
662
  }
663
+
664
+ default:
665
+ {
666
+ assert(0);
667
+ break;
668
+ }
495
669
 
496
670
  }
497
671
  lst_cond[row] = Rcpp::List::create(Rcpp::clone(cond_clust));
@@ -528,6 +702,12 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs,
528
702
  cond_clust["column"] = Rcpp::as<Rcpp::CharacterVector>(colnames_ord[model_outputs.all_trees[outl_col][curr_tree].col_num]);
529
703
  break;
530
704
  }
705
+
706
+ default:
707
+ {
708
+ assert(0);
709
+ break;
710
+ }
531
711
  }
532
712
 
533
713
  /* add conditions from tree */
@@ -599,6 +779,7 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs,
599
779
  break;
600
780
  }
601
781
 
782
+ default: {}
602
783
  }
603
784
  break;
604
785
  }
@@ -696,6 +877,7 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs,
696
877
  break;
697
878
  }
698
879
 
880
+ default: {}
699
881
  }
700
882
  break;
701
883
  }
@@ -758,10 +940,16 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs,
758
940
  break;
759
941
  }
760
942
 
943
+ default: {}
761
944
  }
762
945
  break;
763
946
  }
764
947
 
948
+ default:
949
+ {
950
+ assert(0);
951
+ break;
952
+ }
765
953
  }
766
954
  }
767
955
 
@@ -796,6 +984,12 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs,
796
984
  cond_clust["column"] = Rcpp::as<Rcpp::CharacterVector>(colnames_ord[model_outputs.all_trees[outl_col][parent_tree].col_num]);
797
985
  break;
798
986
  }
987
+
988
+ default:
989
+ {
990
+ assert(0);
991
+ break;
992
+ }
799
993
  }
800
994
 
801
995
 
@@ -835,6 +1029,11 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs,
835
1029
  cond_clust["value_comp"] = Rcpp::as<Rcpp::CharacterVector>(NA_STRING);
836
1030
  break;
837
1031
  }
1032
+
1033
+ default:
1034
+ {
1035
+ unexpected_error();
1036
+ }
838
1037
  }
839
1038
  break;
840
1039
  }
@@ -1011,6 +1210,11 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs,
1011
1210
  break;
1012
1211
  }
1013
1212
 
1213
+ default:
1214
+ {
1215
+ assert(0);
1216
+ break;
1217
+ }
1014
1218
  }
1015
1219
 
1016
1220
 
@@ -1038,6 +1242,37 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs,
1038
1242
  return outp;
1039
1243
  }
1040
1244
 
1245
+ struct args_describe_outliers {
1246
+ ModelOutputs *model_outputs;
1247
+ double *arr_num;
1248
+ int *arr_cat;
1249
+ int *arr_ord;
1250
+ Rcpp::ListOf<Rcpp::StringVector> *cat_levels;
1251
+ Rcpp::ListOf<Rcpp::StringVector> *ord_levels;
1252
+ Rcpp::StringVector *colnames_num;
1253
+ Rcpp::StringVector *colnames_cat;
1254
+ Rcpp::StringVector *colnames_ord;
1255
+ Rcpp::NumericVector *min_date;
1256
+ Rcpp::NumericVector *min_ts;
1257
+ };
1258
+
1259
+ SEXP describe_outliers_wrapper(void *args_)
1260
+ {
1261
+ args_describe_outliers *args = (args_describe_outliers*)args_;
1262
+ return describe_outliers(*(args->model_outputs),
1263
+ args->arr_num,
1264
+ args->arr_cat,
1265
+ args->arr_ord,
1266
+ *(args->cat_levels),
1267
+ *(args->ord_levels),
1268
+ *(args->colnames_num),
1269
+ *(args->colnames_cat),
1270
+ *(args->colnames_ord),
1271
+ *(args->min_date),
1272
+ *(args->min_ts));
1273
+ }
1274
+
1275
+
1041
1276
  /* for extracting info about flaggable outliers */
1042
1277
  Rcpp::List extract_outl_bounds(ModelOutputs &model_outputs,
1043
1278
  Rcpp::ListOf<Rcpp::StringVector> cat_levels,
@@ -1102,9 +1337,27 @@ Rcpp::List extract_outl_bounds(ModelOutputs &model_outputs,
1102
1337
  return outp;
1103
1338
  }
1104
1339
 
1340
+ struct args_extract_outl_bounds {
1341
+ ModelOutputs *model_outputs;
1342
+ Rcpp::ListOf<Rcpp::StringVector> *cat_levels;
1343
+ Rcpp::ListOf<Rcpp::StringVector> *ord_levels;
1344
+ Rcpp::NumericVector *min_date;
1345
+ Rcpp::NumericVector *min_ts;
1346
+ };
1347
+
1348
+ SEXP extract_outl_bounds_wrapper(void *args_)
1349
+ {
1350
+ args_extract_outl_bounds *args = (args_extract_outl_bounds*)args_;
1351
+ return extract_outl_bounds(*(args->model_outputs),
1352
+ *(args->cat_levels),
1353
+ *(args->ord_levels),
1354
+ *(args->min_date),
1355
+ *(args->min_ts));
1356
+ }
1357
+
1105
1358
 
1106
1359
  /* external functions for fitting the model and predicting outliers */
1107
- // [[Rcpp::export]]
1360
+ // [[Rcpp::export(rng = false)]]
1108
1361
  Rcpp::List fit_OutlierTree(Rcpp::NumericVector arr_num, size_t ncols_numeric,
1109
1362
  Rcpp::IntegerVector arr_cat, size_t ncols_categ, Rcpp::IntegerVector ncat,
1110
1363
  Rcpp::IntegerVector arr_ord, size_t ncols_ord, Rcpp::IntegerVector ncat_ord,
@@ -1121,8 +1374,16 @@ Rcpp::List fit_OutlierTree(Rcpp::NumericVector arr_num, size_t ncols_numeric,
1121
1374
  Rcpp::NumericVector min_date,
1122
1375
  Rcpp::NumericVector min_ts)
1123
1376
  {
1377
+ Rcpp::List outp = Rcpp::List::create(
1378
+ Rcpp::_["ptr_model"] = R_NilValue,
1379
+ Rcpp::_["bounds"] = R_NilValue,
1380
+ Rcpp::_["outliers_info"] = R_NilValue,
1381
+ Rcpp::_["ntrees"] = R_NilValue,
1382
+ Rcpp::_["nclust"] = R_NilValue,
1383
+ Rcpp::_["found_outliers"] = R_NilValue
1384
+ );
1385
+
1124
1386
  bool found_outliers;
1125
- Rcpp::List outp;
1126
1387
  size_t tot_cols = ncols_numeric + ncols_categ + ncols_ord;
1127
1388
  std::vector<char> cols_ignore;
1128
1389
  char *cols_ignore_ptr = NULL;
@@ -1132,54 +1393,62 @@ Rcpp::List fit_OutlierTree(Rcpp::NumericVector arr_num, size_t ncols_numeric,
1132
1393
  cols_ignore_ptr = &cols_ignore[0];
1133
1394
  }
1134
1395
  std::vector<double> Xcpp;
1135
- double *arr_num_C = set_R_nan_as_C_nan(&arr_num[0], Xcpp, arr_num.size(), nthreads);
1396
+ double *arr_num_C = set_R_nan_as_C_nan(REAL(arr_num), Xcpp, arr_num.size(), nthreads);
1136
1397
 
1137
1398
  std::unique_ptr<ModelOutputs> model_outputs = std::unique_ptr<ModelOutputs>(new ModelOutputs());
1138
1399
  found_outliers = fit_outliers_models(*model_outputs,
1139
1400
  arr_num_C, ncols_numeric,
1140
- &arr_cat[0], ncols_categ, &ncat[0],
1141
- &arr_ord[0], ncols_ord, &ncat_ord[0],
1401
+ INTEGER(arr_cat), ncols_categ, INTEGER(ncat),
1402
+ INTEGER(arr_ord), ncols_ord, INTEGER(ncat_ord),
1142
1403
  nrows, cols_ignore_ptr, nthreads,
1143
1404
  categ_as_bin, ord_as_bin, cat_bruteforce_subset, categ_from_maj, take_mid,
1144
1405
  max_depth, max_perc_outliers, min_size_numeric, min_size_categ,
1145
1406
  min_gain, gain_as_pct, follow_all, z_norm, z_outlier);
1146
1407
 
1147
- outp["bounds"] = extract_outl_bounds(*model_outputs,
1148
- cat_levels,
1149
- ord_levels,
1150
- min_date,
1151
- min_ts);
1408
+ args_extract_outl_bounds temp = {
1409
+ model_outputs.get(),
1410
+ &cat_levels,
1411
+ &ord_levels,
1412
+ &min_date,
1413
+ &min_ts
1414
+ };
1415
+ outp["bounds"] = Rcpp::unwindProtect(extract_outl_bounds_wrapper, (void*)&temp);
1152
1416
 
1153
- outp["serialized_obj"] = serialize_OutlierTree(model_outputs.get());
1154
1417
  if (return_outliers) {
1155
- outp["outliers_info"] = describe_outliers(*model_outputs,
1156
- arr_num_C,
1157
- &arr_cat[0],
1158
- &arr_ord[0],
1159
- cat_levels,
1160
- ord_levels,
1161
- colnames_num,
1162
- colnames_cat,
1163
- colnames_ord,
1164
- min_date,
1165
- min_ts);
1418
+ args_describe_outliers temp = {
1419
+ model_outputs.get(),
1420
+ arr_num_C,
1421
+ INTEGER(arr_cat),
1422
+ INTEGER(arr_ord),
1423
+ &cat_levels,
1424
+ &ord_levels,
1425
+ &colnames_num,
1426
+ &colnames_cat,
1427
+ &colnames_ord,
1428
+ &min_date,
1429
+ &min_ts
1430
+ };
1431
+ outp["outliers_info"] = Rcpp::unwindProtect(describe_outliers_wrapper, (void*)&temp);
1166
1432
  }
1433
+ forget_row_outputs(*model_outputs);
1434
+
1167
1435
  /* add number of trees and clusters */
1168
1436
  size_t ntrees = 0, nclust = 0;
1169
1437
  for (size_t col = 0; col < model_outputs->all_trees.size(); col++) {
1170
1438
  ntrees += model_outputs->all_trees[col].size();
1171
1439
  nclust += model_outputs->all_clusters[col].size();
1172
1440
  }
1173
- outp["ntrees"] = Rcpp::wrap((int) ntrees);
1174
- outp["nclust"] = Rcpp::wrap((int) nclust);
1175
- outp["found_outliers"] = Rcpp::wrap(found_outliers);
1441
+ int ntrees_int = (int)ntrees;
1442
+ int nclust_int = (int)nclust;
1443
+ outp["ntrees"] = Rcpp::unwindProtect(safe_int, (void*)&ntrees_int);
1444
+ outp["nclust"] = Rcpp::unwindProtect(safe_int, (void*)&nclust_int);
1445
+ outp["found_outliers"] = Rcpp::unwindProtect(safe_bool, (void*)&found_outliers);
1176
1446
 
1177
- forget_row_outputs(*model_outputs);
1178
- outp["ptr_model"] = Rcpp::XPtr<ModelOutputs>(model_outputs.release(), true);
1447
+ outp["ptr_model"] = Rcpp::unwindProtect(get_altrepped_pointer, &model_outputs);
1179
1448
  return outp;
1180
1449
  }
1181
1450
 
1182
- // [[Rcpp::export]]
1451
+ // [[Rcpp::export(rng = false)]]
1183
1452
  Rcpp::List predict_OutlierTree(SEXP ptr_model, size_t nrows, int nthreads,
1184
1453
  Rcpp::NumericVector arr_num, Rcpp::IntegerVector arr_cat, Rcpp::IntegerVector arr_ord,
1185
1454
  Rcpp::ListOf<Rcpp::StringVector> cat_levels,
@@ -1190,36 +1459,59 @@ Rcpp::List predict_OutlierTree(SEXP ptr_model, size_t nrows, int nthreads,
1190
1459
  Rcpp::NumericVector min_date,
1191
1460
  Rcpp::NumericVector min_ts)
1192
1461
  {
1193
- std::vector<double> Xcpp;
1194
- double *arr_num_C = set_R_nan_as_C_nan(&arr_num[0], Xcpp, arr_num.size(), nthreads);
1462
+ Rcpp::NumericVector Xcpp;
1463
+ double *arr_num_C = set_R_nan_as_C_nan(REAL(arr_num), Xcpp, arr_num.size(), nthreads);
1195
1464
 
1196
1465
  ModelOutputs *model_outputs = static_cast<ModelOutputs*>(R_ExternalPtrAddr(ptr_model));
1197
- bool found_outliers = find_new_outliers(&arr_num[0], &arr_cat[0], &arr_ord[0],
1466
+ bool found_outliers = find_new_outliers(arr_num_C, INTEGER(arr_cat), INTEGER(arr_ord),
1198
1467
  nrows, nthreads, *model_outputs);
1199
- Rcpp::List outp = describe_outliers(*model_outputs,
1200
- arr_num_C,
1201
- &arr_cat[0],
1202
- &arr_ord[0],
1203
- cat_levels,
1204
- ord_levels,
1205
- colnames_num,
1206
- colnames_cat,
1207
- colnames_ord,
1208
- min_date,
1209
- min_ts);
1210
- outp["found_outliers"] = Rcpp::LogicalVector(found_outliers);
1468
+ args_describe_outliers temp = {
1469
+ model_outputs,
1470
+ arr_num_C,
1471
+ INTEGER(arr_cat),
1472
+ INTEGER(arr_ord),
1473
+ &cat_levels,
1474
+ &ord_levels,
1475
+ &colnames_num,
1476
+ &colnames_cat,
1477
+ &colnames_ord,
1478
+ &min_date,
1479
+ &min_ts
1480
+ };
1481
+
1482
+ Rcpp::List outp;
1483
+ try {
1484
+ outp = Rcpp::unwindProtect(describe_outliers_wrapper, (void*)&temp);
1485
+ } catch(...) {
1486
+ forget_row_outputs(*model_outputs);
1487
+ throw;
1488
+ }
1211
1489
  forget_row_outputs(*model_outputs);
1490
+ outp["found_outliers"] = Rcpp::LogicalVector(found_outliers);
1212
1491
  return outp;
1213
1492
  }
1214
1493
 
1215
- // [[Rcpp::export]]
1494
+ // [[Rcpp::export(rng = false)]]
1216
1495
  Rcpp::LogicalVector check_few_values(Rcpp::NumericVector arr_num, size_t nrows, size_t ncols, int nthreads)
1217
1496
  {
1218
- std::vector<char> too_few_vals(ncols, 0);
1219
- check_more_two_values(&arr_num[0], nrows, ncols, nthreads, too_few_vals.data());
1220
1497
  Rcpp::LogicalVector outp(ncols);
1498
+ std::vector<char> too_few_vals(ncols, 0);
1499
+ check_more_two_values(REAL(arr_num), nrows, ncols, nthreads, too_few_vals.data());
1221
1500
  for (size_t col = 0; col < ncols; col++) {
1222
1501
  outp[col] = (bool) too_few_vals[col];
1223
1502
  }
1224
1503
  return outp;
1225
1504
  }
1505
+
1506
+
1507
+ // [[Rcpp::export(rng = false)]]
1508
+ bool R_has_openmp()
1509
+ {
1510
+ #ifdef _OPENMP
1511
+ return true;
1512
+ #else
1513
+ return false;
1514
+ #endif
1515
+ }
1516
+
1517
+ #endif /* _FOR_R */
@@ -74,7 +74,7 @@
74
74
  */
75
75
  void find_outlier_categories(size_t categ_counts[], size_t ncateg, size_t tot, double max_perc_outliers,
76
76
  long double perc_threshold[], size_t buffer_ix[], long double buffer_perc[],
77
- double z_norm, char is_outlier[], bool *found_outliers, bool *new_is_outlier,
77
+ double z_norm, signed char is_outlier[], bool *found_outliers, bool *new_is_outlier,
78
78
  double *next_most_comm)
79
79
  {
80
80
  //TODO: must also establish bounds for new, unseen categories
@@ -90,7 +90,7 @@ void find_outlier_categories(size_t categ_counts[], size_t ncateg, size_t tot, d
90
90
  size_t size_tail = 0;
91
91
 
92
92
  /* reset the temporary arrays and fill them */
93
- memset(is_outlier, 0, ncateg * sizeof(char));
93
+ memset(is_outlier, 0, ncateg * sizeof(signed char));
94
94
  for (size_t cat = 0; cat < ncateg; cat++) {
95
95
  buffer_ix[cat] = cat;
96
96
  buffer_perc[cat] = (categ_counts[cat] > 0)? ((long double)categ_counts[cat] / tot_dbl) : 0;
@@ -225,13 +225,13 @@ void find_outlier_categories(size_t categ_counts[], size_t ncateg, size_t tot, d
225
225
  * Category to which the majority of the observations belong.
226
226
  */
227
227
  void find_outlier_categories_by_maj(size_t categ_counts[], size_t ncateg, size_t tot, double max_perc_outliers,
228
- long double prior_prob[], double z_outlier, char is_outlier[],
228
+ long double prior_prob[], double z_outlier, signed char is_outlier[],
229
229
  bool *found_outliers, bool *new_is_outlier, int *categ_maj)
230
230
  {
231
231
  /* initialize parameters as needed */
232
232
  *found_outliers = false;
233
233
  *new_is_outlier = false;
234
- memset(is_outlier, 0, ncateg * sizeof(char));
234
+ memset(is_outlier, 0, ncateg * sizeof(signed char));
235
235
  size_t max_outliers = (size_t) calculate_max_outliers((long double)tot, max_perc_outliers);
236
236
  long double tot_dbl = (long double) (tot + 1);
237
237
  size_t n_non_maj;
@@ -283,7 +283,7 @@ void find_outlier_categories_by_maj(size_t categ_counts[], size_t ncateg, size_t
283
283
  * Proportion of the least common non-outlier category.
284
284
  */
285
285
  bool find_outlier_categories_no_cond(size_t categ_counts[], size_t ncateg, size_t tot,
286
- char is_outlier[], double *next_most_comm)
286
+ signed char is_outlier[], double *next_most_comm)
287
287
  {
288
288
  /* if sample is too small, don't flag any as outliers */
289
289
  if (tot < 1000) return false;
@@ -296,7 +296,7 @@ bool find_outlier_categories_no_cond(size_t categ_counts[], size_t ncateg, size_
296
296
 
297
297
  /* look if there's any category meeting the first condition and none meeting the second one */
298
298
  bool has_outlier_cat = false;
299
- memset(is_outlier, 0, sizeof(char) * ncateg);
299
+ memset(is_outlier, 0, sizeof(signed char) * ncateg);
300
300
  for (size_t cat = 0; cat < ncateg; cat++) {
301
301
  if (categ_counts[cat] > max_outliers && categ_counts[cat] < max_next_most_comm) {
302
302
  has_outlier_cat = false;