outliertree 0.2.1 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,8 @@
1
+ #ifdef _FOR_R
2
+
1
3
  #include <Rcpp.h>
4
+ #include <Rcpp/unwindProtect.h>
5
+ #include <R_ext/Altrep.h>
2
6
  // [[Rcpp::plugins(cpp11)]]
3
7
 
4
8
  /* This is to serialize the model objects */
@@ -7,15 +11,31 @@
7
11
  #include <cereal/types/vector.hpp>
8
12
  #include <sstream>
9
13
  #include <string>
14
+ #include <limits>
10
15
 
11
16
  /* This is the package's header */
12
17
  #include "outlier_tree.hpp"
13
18
 
19
+ void delete_model_from_R_ptr(SEXP R_ptr)
20
+ {
21
+ ModelOutputs *model = static_cast<ModelOutputs*>(R_ExternalPtrAddr(R_ptr));
22
+ delete model;
23
+ R_SetExternalPtrAddr(R_ptr, nullptr);
24
+ R_ClearExternalPtr(R_ptr);
25
+ }
26
+
27
+ SEXP alloc_RawVec(void *data)
28
+ {
29
+ size_t vec_size = *(size_t*)data;
30
+ if (vec_size > (size_t)std::numeric_limits<R_xlen_t>::max())
31
+ Rcpp::stop("Resulting model is too large for R to handle.");
32
+ return Rf_allocVector(RAWSXP, vec_size);
33
+ }
34
+
14
35
  /* for model serialization and re-usage in R */
15
36
  /* https://stackoverflow.com/questions/18474292/how-to-handle-c-internal-data-structure-in-r-in-order-to-allow-save-load */
16
37
  /* this extra comment below the link is a workaround for Rcpp issue 675 in GitHub, do not remove it */
17
- #include <Rinternals.h>
18
- Rcpp::RawVector serialize_OutlierTree(ModelOutputs *model_outputs)
38
+ SEXP serialize_OutlierTree(ModelOutputs *model_outputs)
19
39
  {
20
40
  std::stringstream ss;
21
41
  {
@@ -23,30 +43,157 @@ Rcpp::RawVector serialize_OutlierTree(ModelOutputs *model_outputs)
23
43
  oarchive(*model_outputs);
24
44
  }
25
45
  ss.seekg(0, ss.end);
26
- Rcpp::RawVector retval(ss.tellg());
46
+ std::stringstream::pos_type vec_size = ss.tellg();
47
+ if (vec_size <= 0) {
48
+ Rf_error("Error: model is too big to serialize, resulting object will not be usable.\n");
49
+ }
50
+ size_t vec_size_ = (size_t)vec_size;
51
+ SEXP retval = PROTECT(Rcpp::unwindProtect(alloc_RawVec, (void*)&vec_size_));
27
52
  ss.seekg(0, ss.beg);
28
- ss.read(reinterpret_cast<char*>(&retval[0]), retval.size());
53
+ ss.read(reinterpret_cast<char*>(RAW(retval)), vec_size_);
54
+ UNPROTECT(1);
29
55
  return retval;
30
56
  }
31
57
 
32
- // [[Rcpp::export]]
33
- SEXP deserialize_OutlierTree(Rcpp::RawVector src)
58
+ SEXP deserialize_OutlierTree(SEXP src, SEXP ptr_obj)
34
59
  {
35
60
  std::stringstream ss;
36
- ss.write(reinterpret_cast<char*>(&src[0]), src.size());
61
+ ss.write(reinterpret_cast<char*>(RAW(src)), Rf_xlength(src));
37
62
  ss.seekg(0, ss.beg);
38
63
  std::unique_ptr<ModelOutputs> model_outputs = std::unique_ptr<ModelOutputs>(new ModelOutputs());
39
64
  {
40
65
  cereal::BinaryInputArchive iarchive(ss);
41
66
  iarchive(*model_outputs);
42
67
  }
43
- return Rcpp::XPtr<ModelOutputs>(model_outputs.release(), true);
68
+ R_SetExternalPtrAddr(ptr_obj, model_outputs.get());
69
+ R_RegisterCFinalizerEx(ptr_obj, delete_model_from_R_ptr, TRUE);
70
+ model_outputs.release();
71
+ return R_NilValue;
72
+ }
73
+
74
+ static R_altrep_class_t otree_altrepped_pointer_class;
75
+
76
+ R_xlen_t altrepped_pointer_length(SEXP obj)
77
+ {
78
+ return 1;
79
+ }
80
+
81
+ SEXP get_element_from_altrepped_ptr(SEXP R_altrepped_obj, R_xlen_t idx)
82
+ {
83
+ return R_altrep_data1(R_altrepped_obj);
84
+ }
85
+
86
+ Rboolean inspect_altrepped_pointer(SEXP x, int pre, int deep, int pvec, void (*inspect_subtree)(SEXP, int, int, int))
87
+ {
88
+ Rprintf("Altrepped pointer [address:%p]\n", R_ExternalPtrAddr(R_altrep_data1(x)));
89
+ return TRUE;
90
+ }
91
+
92
+ SEXP duplicate_altrepped_pointer(SEXP altrepped_obj, Rboolean deep)
93
+ {
94
+ SEXP R_ptr_name = PROTECT(Rf_mkString("ptr"));
95
+ SEXP R_ptr_class = PROTECT(Rf_mkString("otree_altrepped_handle"));
96
+ SEXP out = PROTECT(R_new_altrep(otree_altrepped_pointer_class, R_NilValue, R_NilValue));
97
+
98
+ if (!deep) {
99
+ R_set_altrep_data1(out, R_altrep_data1(altrepped_obj));
100
+ }
101
+
102
+ else {
103
+
104
+ SEXP R_ptr = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
105
+
106
+ try {
107
+ std::unique_ptr<ModelOutputs> new_obj(new ModelOutputs());
108
+ ModelOutputs *cpp_ptr = (ModelOutputs*)R_ExternalPtrAddr(R_altrep_data1(altrepped_obj));
109
+ *new_obj = *cpp_ptr;
110
+
111
+ R_SetExternalPtrAddr(R_ptr, new_obj.get());
112
+ R_RegisterCFinalizerEx(R_ptr, delete_model_from_R_ptr, TRUE);
113
+ new_obj.release();
114
+ }
115
+
116
+ catch (const std::exception &ex) {
117
+ Rf_error("%s\n", ex.what());
118
+ }
119
+
120
+ R_set_altrep_data1(out, R_ptr);
121
+ UNPROTECT(1);
122
+ }
123
+
124
+ Rf_setAttrib(out, R_NamesSymbol, R_ptr_name);
125
+ Rf_setAttrib(out, R_NamesSymbol, R_ptr_class);
126
+ UNPROTECT(3);
127
+ return out;
44
128
  }
45
129
 
46
- // [[Rcpp::export]]
47
- Rcpp::LogicalVector check_null_ptr_model(SEXP ptr_model)
130
+ SEXP serialize_altrepped_pointer(SEXP altrepped_obj)
48
131
  {
49
- return Rcpp::LogicalVector(R_ExternalPtrAddr(ptr_model) == NULL);
132
+ return serialize_OutlierTree((ModelOutputs*)R_ExternalPtrAddr(R_altrep_data1(altrepped_obj)));
133
+ }
134
+
135
+ SEXP deserialize_altrepped_pointer(SEXP cls, SEXP R_state)
136
+ {
137
+ SEXP R_ptr_name = PROTECT(Rf_mkString("ptr"));
138
+ SEXP R_ptr_class = PROTECT(Rf_mkString("otree_altrepped_handle"));
139
+ SEXP R_ptr = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
140
+ SEXP out = PROTECT(R_new_altrep(otree_altrepped_pointer_class, R_NilValue, R_NilValue));
141
+
142
+ try {
143
+ deserialize_OutlierTree(R_state, R_ptr);
144
+ }
145
+ catch (const std::exception &ex) {
146
+ Rf_error("%s\n", ex.what());
147
+ }
148
+
149
+ R_set_altrep_data1(out, R_ptr);
150
+ Rf_setAttrib(out, R_NamesSymbol, R_ptr_name);
151
+ Rf_setAttrib(out, R_ClassSymbol, R_ptr_class);
152
+
153
+ UNPROTECT(4);
154
+ return out;
155
+ }
156
+
157
+ SEXP get_altrepped_pointer(void *void_ptr)
158
+ {
159
+ SEXP R_ptr_name = PROTECT(Rf_mkString("ptr"));
160
+ SEXP R_ptr_class = PROTECT(Rf_mkString("otree_altrepped_handle"));
161
+ SEXP R_ptr = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
162
+ SEXP out = PROTECT(R_new_altrep(otree_altrepped_pointer_class, R_NilValue, R_NilValue));
163
+
164
+ std::unique_ptr<ModelOutputs> *ptr = (std::unique_ptr<ModelOutputs>*)void_ptr;
165
+ R_SetExternalPtrAddr(R_ptr, ptr->get());
166
+ R_RegisterCFinalizerEx(R_ptr, delete_model_from_R_ptr, TRUE);
167
+ ptr->release();
168
+
169
+ R_set_altrep_data1(out, R_ptr);
170
+ Rf_setAttrib(out, R_NamesSymbol, R_ptr_name);
171
+ Rf_setAttrib(out, R_ClassSymbol, R_ptr_class);
172
+
173
+ UNPROTECT(4);
174
+ return out;
175
+ }
176
+
177
+ // [[Rcpp::init]]
178
+ void init_altrepped_class(DllInfo* dll)
179
+ {
180
+ otree_altrepped_pointer_class = R_make_altlist_class("otree_altrepped_pointer_class", "outliertree", dll);
181
+ R_set_altrep_Length_method(otree_altrepped_pointer_class, altrepped_pointer_length);
182
+ R_set_altrep_Inspect_method(otree_altrepped_pointer_class, inspect_altrepped_pointer);
183
+ R_set_altrep_Serialized_state_method(otree_altrepped_pointer_class, serialize_altrepped_pointer);
184
+ R_set_altrep_Unserialize_method(otree_altrepped_pointer_class, deserialize_altrepped_pointer);
185
+ R_set_altrep_Duplicate_method(otree_altrepped_pointer_class, duplicate_altrepped_pointer);
186
+ R_set_altlist_Elt_method(otree_altrepped_pointer_class, get_element_from_altrepped_ptr);
187
+ }
188
+
189
+ SEXP safe_int(void *x)
190
+ {
191
+ return Rf_ScalarInteger(*(int*)x);
192
+ }
193
+
194
+ SEXP safe_bool(void *x)
195
+ {
196
+ return Rf_ScalarLogical(*(bool*)x);
50
197
  }
51
198
 
52
199
  double* set_R_nan_as_C_nan(double *restrict x_R, std::vector<double> &x_C, size_t n, int nthreads)
@@ -59,12 +206,22 @@ double* set_R_nan_as_C_nan(double *restrict x_R, std::vector<double> &x_C, size_
59
206
  return x_C.data();
60
207
  }
61
208
 
209
+ double* set_R_nan_as_C_nan(double *restrict x_R, Rcpp::NumericVector &x_C, size_t n, int nthreads)
210
+ {
211
+ x_C = Rcpp::NumericVector(x_R, x_R + n);
212
+ #pragma omp parallel for schedule(static) num_threads(nthreads) shared(x_R, x_C, n)
213
+ for (size_t_for i = 0; i < n; i++)
214
+ if (isnan(x_R[i]) || Rcpp::NumericVector::is_na(x_R[i]) || Rcpp::traits::is_nan<REALSXP>(x_R[i]))
215
+ x_C[i] = NAN;
216
+ return REAL(x_C);
217
+ }
218
+
62
219
 
63
220
  /* for predicting outliers */
64
221
  Rcpp::List describe_outliers(ModelOutputs &model_outputs,
65
- double *arr_num,
66
- int *arr_cat,
67
- int *arr_ord,
222
+ double *restrict arr_num,
223
+ int *restrict arr_cat,
224
+ int *restrict arr_ord,
68
225
  Rcpp::ListOf<Rcpp::StringVector> cat_levels,
69
226
  Rcpp::ListOf<Rcpp::StringVector> ord_levels,
70
227
  Rcpp::StringVector colnames_num,
@@ -209,7 +366,7 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs,
209
366
  } else if (outl_col < (ncols_num + ncols_cat)) {
210
367
  if (outl_col < (ncols_num + ncols_cat_cat)) {
211
368
  tmp_bool = Rcpp::LogicalVector(model_outputs.all_clusters[outl_col][outl_clust].subset_common.size(), false);
212
- for (size_t cat = 0; cat < tmp_bool.size(); cat++) {
369
+ for (size_t cat = 0; cat < (size_t)tmp_bool.size(); cat++) {
213
370
  if (model_outputs.all_clusters[outl_col][outl_clust].subset_common[cat] == 0) {
214
371
  tmp_bool[cat] = true;
215
372
  }
@@ -253,7 +410,7 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs,
253
410
  }
254
411
  } else {
255
412
  tmp_bool = Rcpp::LogicalVector(model_outputs.all_clusters[outl_col][outl_clust].subset_common.size(), false);
256
- for (size_t cat = 0; cat < tmp_bool.size(); cat++) {
413
+ for (size_t cat = 0; cat < (size_t)tmp_bool.size(); cat++) {
257
414
  if (model_outputs.all_clusters[outl_col][outl_clust].subset_common[cat] == 0) {
258
415
  tmp_bool[cat] = true;
259
416
  }
@@ -345,6 +502,12 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs,
345
502
  }
346
503
  break;
347
504
  }
505
+
506
+ default:
507
+ {
508
+ assert(0);
509
+ break;
510
+ }
348
511
  }
349
512
 
350
513
  /* add the comparison point */
@@ -377,6 +540,11 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs,
377
540
  cond_clust["value_comp"] = Rcpp::as<Rcpp::CharacterVector>(NA_STRING);
378
541
  break;
379
542
  }
543
+
544
+ default:
545
+ {
546
+ unexpected_error();
547
+ }
380
548
  }
381
549
  break;
382
550
  }
@@ -492,6 +660,12 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs,
492
660
  }
493
661
  break;
494
662
  }
663
+
664
+ default:
665
+ {
666
+ assert(0);
667
+ break;
668
+ }
495
669
 
496
670
  }
497
671
  lst_cond[row] = Rcpp::List::create(Rcpp::clone(cond_clust));
@@ -528,6 +702,12 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs,
528
702
  cond_clust["column"] = Rcpp::as<Rcpp::CharacterVector>(colnames_ord[model_outputs.all_trees[outl_col][curr_tree].col_num]);
529
703
  break;
530
704
  }
705
+
706
+ default:
707
+ {
708
+ assert(0);
709
+ break;
710
+ }
531
711
  }
532
712
 
533
713
  /* add conditions from tree */
@@ -599,6 +779,7 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs,
599
779
  break;
600
780
  }
601
781
 
782
+ default: {}
602
783
  }
603
784
  break;
604
785
  }
@@ -696,6 +877,7 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs,
696
877
  break;
697
878
  }
698
879
 
880
+ default: {}
699
881
  }
700
882
  break;
701
883
  }
@@ -758,10 +940,16 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs,
758
940
  break;
759
941
  }
760
942
 
943
+ default: {}
761
944
  }
762
945
  break;
763
946
  }
764
947
 
948
+ default:
949
+ {
950
+ assert(0);
951
+ break;
952
+ }
765
953
  }
766
954
  }
767
955
 
@@ -796,6 +984,12 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs,
796
984
  cond_clust["column"] = Rcpp::as<Rcpp::CharacterVector>(colnames_ord[model_outputs.all_trees[outl_col][parent_tree].col_num]);
797
985
  break;
798
986
  }
987
+
988
+ default:
989
+ {
990
+ assert(0);
991
+ break;
992
+ }
799
993
  }
800
994
 
801
995
 
@@ -835,6 +1029,11 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs,
835
1029
  cond_clust["value_comp"] = Rcpp::as<Rcpp::CharacterVector>(NA_STRING);
836
1030
  break;
837
1031
  }
1032
+
1033
+ default:
1034
+ {
1035
+ unexpected_error();
1036
+ }
838
1037
  }
839
1038
  break;
840
1039
  }
@@ -1011,6 +1210,11 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs,
1011
1210
  break;
1012
1211
  }
1013
1212
 
1213
+ default:
1214
+ {
1215
+ assert(0);
1216
+ break;
1217
+ }
1014
1218
  }
1015
1219
 
1016
1220
 
@@ -1038,6 +1242,37 @@ Rcpp::List describe_outliers(ModelOutputs &model_outputs,
1038
1242
  return outp;
1039
1243
  }
1040
1244
 
1245
+ struct args_describe_outliers {
1246
+ ModelOutputs *model_outputs;
1247
+ double *arr_num;
1248
+ int *arr_cat;
1249
+ int *arr_ord;
1250
+ Rcpp::ListOf<Rcpp::StringVector> *cat_levels;
1251
+ Rcpp::ListOf<Rcpp::StringVector> *ord_levels;
1252
+ Rcpp::StringVector *colnames_num;
1253
+ Rcpp::StringVector *colnames_cat;
1254
+ Rcpp::StringVector *colnames_ord;
1255
+ Rcpp::NumericVector *min_date;
1256
+ Rcpp::NumericVector *min_ts;
1257
+ };
1258
+
1259
+ SEXP describe_outliers_wrapper(void *args_)
1260
+ {
1261
+ args_describe_outliers *args = (args_describe_outliers*)args_;
1262
+ return describe_outliers(*(args->model_outputs),
1263
+ args->arr_num,
1264
+ args->arr_cat,
1265
+ args->arr_ord,
1266
+ *(args->cat_levels),
1267
+ *(args->ord_levels),
1268
+ *(args->colnames_num),
1269
+ *(args->colnames_cat),
1270
+ *(args->colnames_ord),
1271
+ *(args->min_date),
1272
+ *(args->min_ts));
1273
+ }
1274
+
1275
+
1041
1276
  /* for extracting info about flaggable outliers */
1042
1277
  Rcpp::List extract_outl_bounds(ModelOutputs &model_outputs,
1043
1278
  Rcpp::ListOf<Rcpp::StringVector> cat_levels,
@@ -1102,9 +1337,27 @@ Rcpp::List extract_outl_bounds(ModelOutputs &model_outputs,
1102
1337
  return outp;
1103
1338
  }
1104
1339
 
1340
+ struct args_extract_outl_bounds {
1341
+ ModelOutputs *model_outputs;
1342
+ Rcpp::ListOf<Rcpp::StringVector> *cat_levels;
1343
+ Rcpp::ListOf<Rcpp::StringVector> *ord_levels;
1344
+ Rcpp::NumericVector *min_date;
1345
+ Rcpp::NumericVector *min_ts;
1346
+ };
1347
+
1348
+ SEXP extract_outl_bounds_wrapper(void *args_)
1349
+ {
1350
+ args_extract_outl_bounds *args = (args_extract_outl_bounds*)args_;
1351
+ return extract_outl_bounds(*(args->model_outputs),
1352
+ *(args->cat_levels),
1353
+ *(args->ord_levels),
1354
+ *(args->min_date),
1355
+ *(args->min_ts));
1356
+ }
1357
+
1105
1358
 
1106
1359
  /* external functions for fitting the model and predicting outliers */
1107
- // [[Rcpp::export]]
1360
+ // [[Rcpp::export(rng = false)]]
1108
1361
  Rcpp::List fit_OutlierTree(Rcpp::NumericVector arr_num, size_t ncols_numeric,
1109
1362
  Rcpp::IntegerVector arr_cat, size_t ncols_categ, Rcpp::IntegerVector ncat,
1110
1363
  Rcpp::IntegerVector arr_ord, size_t ncols_ord, Rcpp::IntegerVector ncat_ord,
@@ -1121,8 +1374,16 @@ Rcpp::List fit_OutlierTree(Rcpp::NumericVector arr_num, size_t ncols_numeric,
1121
1374
  Rcpp::NumericVector min_date,
1122
1375
  Rcpp::NumericVector min_ts)
1123
1376
  {
1377
+ Rcpp::List outp = Rcpp::List::create(
1378
+ Rcpp::_["ptr_model"] = R_NilValue,
1379
+ Rcpp::_["bounds"] = R_NilValue,
1380
+ Rcpp::_["outliers_info"] = R_NilValue,
1381
+ Rcpp::_["ntrees"] = R_NilValue,
1382
+ Rcpp::_["nclust"] = R_NilValue,
1383
+ Rcpp::_["found_outliers"] = R_NilValue
1384
+ );
1385
+
1124
1386
  bool found_outliers;
1125
- Rcpp::List outp;
1126
1387
  size_t tot_cols = ncols_numeric + ncols_categ + ncols_ord;
1127
1388
  std::vector<char> cols_ignore;
1128
1389
  char *cols_ignore_ptr = NULL;
@@ -1132,54 +1393,62 @@ Rcpp::List fit_OutlierTree(Rcpp::NumericVector arr_num, size_t ncols_numeric,
1132
1393
  cols_ignore_ptr = &cols_ignore[0];
1133
1394
  }
1134
1395
  std::vector<double> Xcpp;
1135
- double *arr_num_C = set_R_nan_as_C_nan(&arr_num[0], Xcpp, arr_num.size(), nthreads);
1396
+ double *arr_num_C = set_R_nan_as_C_nan(REAL(arr_num), Xcpp, arr_num.size(), nthreads);
1136
1397
 
1137
1398
  std::unique_ptr<ModelOutputs> model_outputs = std::unique_ptr<ModelOutputs>(new ModelOutputs());
1138
1399
  found_outliers = fit_outliers_models(*model_outputs,
1139
1400
  arr_num_C, ncols_numeric,
1140
- &arr_cat[0], ncols_categ, &ncat[0],
1141
- &arr_ord[0], ncols_ord, &ncat_ord[0],
1401
+ INTEGER(arr_cat), ncols_categ, INTEGER(ncat),
1402
+ INTEGER(arr_ord), ncols_ord, INTEGER(ncat_ord),
1142
1403
  nrows, cols_ignore_ptr, nthreads,
1143
1404
  categ_as_bin, ord_as_bin, cat_bruteforce_subset, categ_from_maj, take_mid,
1144
1405
  max_depth, max_perc_outliers, min_size_numeric, min_size_categ,
1145
1406
  min_gain, gain_as_pct, follow_all, z_norm, z_outlier);
1146
1407
 
1147
- outp["bounds"] = extract_outl_bounds(*model_outputs,
1148
- cat_levels,
1149
- ord_levels,
1150
- min_date,
1151
- min_ts);
1408
+ args_extract_outl_bounds temp = {
1409
+ model_outputs.get(),
1410
+ &cat_levels,
1411
+ &ord_levels,
1412
+ &min_date,
1413
+ &min_ts
1414
+ };
1415
+ outp["bounds"] = Rcpp::unwindProtect(extract_outl_bounds_wrapper, (void*)&temp);
1152
1416
 
1153
- outp["serialized_obj"] = serialize_OutlierTree(model_outputs.get());
1154
1417
  if (return_outliers) {
1155
- outp["outliers_info"] = describe_outliers(*model_outputs,
1156
- arr_num_C,
1157
- &arr_cat[0],
1158
- &arr_ord[0],
1159
- cat_levels,
1160
- ord_levels,
1161
- colnames_num,
1162
- colnames_cat,
1163
- colnames_ord,
1164
- min_date,
1165
- min_ts);
1418
+ args_describe_outliers temp = {
1419
+ model_outputs.get(),
1420
+ arr_num_C,
1421
+ INTEGER(arr_cat),
1422
+ INTEGER(arr_ord),
1423
+ &cat_levels,
1424
+ &ord_levels,
1425
+ &colnames_num,
1426
+ &colnames_cat,
1427
+ &colnames_ord,
1428
+ &min_date,
1429
+ &min_ts
1430
+ };
1431
+ outp["outliers_info"] = Rcpp::unwindProtect(describe_outliers_wrapper, (void*)&temp);
1166
1432
  }
1433
+ forget_row_outputs(*model_outputs);
1434
+
1167
1435
  /* add number of trees and clusters */
1168
1436
  size_t ntrees = 0, nclust = 0;
1169
1437
  for (size_t col = 0; col < model_outputs->all_trees.size(); col++) {
1170
1438
  ntrees += model_outputs->all_trees[col].size();
1171
1439
  nclust += model_outputs->all_clusters[col].size();
1172
1440
  }
1173
- outp["ntrees"] = Rcpp::wrap((int) ntrees);
1174
- outp["nclust"] = Rcpp::wrap((int) nclust);
1175
- outp["found_outliers"] = Rcpp::wrap(found_outliers);
1441
+ int ntrees_int = (int)ntrees;
1442
+ int nclust_int = (int)nclust;
1443
+ outp["ntrees"] = Rcpp::unwindProtect(safe_int, (void*)&ntrees_int);
1444
+ outp["nclust"] = Rcpp::unwindProtect(safe_int, (void*)&nclust_int);
1445
+ outp["found_outliers"] = Rcpp::unwindProtect(safe_bool, (void*)&found_outliers);
1176
1446
 
1177
- forget_row_outputs(*model_outputs);
1178
- outp["ptr_model"] = Rcpp::XPtr<ModelOutputs>(model_outputs.release(), true);
1447
+ outp["ptr_model"] = Rcpp::unwindProtect(get_altrepped_pointer, &model_outputs);
1179
1448
  return outp;
1180
1449
  }
1181
1450
 
1182
- // [[Rcpp::export]]
1451
+ // [[Rcpp::export(rng = false)]]
1183
1452
  Rcpp::List predict_OutlierTree(SEXP ptr_model, size_t nrows, int nthreads,
1184
1453
  Rcpp::NumericVector arr_num, Rcpp::IntegerVector arr_cat, Rcpp::IntegerVector arr_ord,
1185
1454
  Rcpp::ListOf<Rcpp::StringVector> cat_levels,
@@ -1190,36 +1459,59 @@ Rcpp::List predict_OutlierTree(SEXP ptr_model, size_t nrows, int nthreads,
1190
1459
  Rcpp::NumericVector min_date,
1191
1460
  Rcpp::NumericVector min_ts)
1192
1461
  {
1193
- std::vector<double> Xcpp;
1194
- double *arr_num_C = set_R_nan_as_C_nan(&arr_num[0], Xcpp, arr_num.size(), nthreads);
1462
+ Rcpp::NumericVector Xcpp;
1463
+ double *arr_num_C = set_R_nan_as_C_nan(REAL(arr_num), Xcpp, arr_num.size(), nthreads);
1195
1464
 
1196
1465
  ModelOutputs *model_outputs = static_cast<ModelOutputs*>(R_ExternalPtrAddr(ptr_model));
1197
- bool found_outliers = find_new_outliers(&arr_num[0], &arr_cat[0], &arr_ord[0],
1466
+ bool found_outliers = find_new_outliers(arr_num_C, INTEGER(arr_cat), INTEGER(arr_ord),
1198
1467
  nrows, nthreads, *model_outputs);
1199
- Rcpp::List outp = describe_outliers(*model_outputs,
1200
- arr_num_C,
1201
- &arr_cat[0],
1202
- &arr_ord[0],
1203
- cat_levels,
1204
- ord_levels,
1205
- colnames_num,
1206
- colnames_cat,
1207
- colnames_ord,
1208
- min_date,
1209
- min_ts);
1210
- outp["found_outliers"] = Rcpp::LogicalVector(found_outliers);
1468
+ args_describe_outliers temp = {
1469
+ model_outputs,
1470
+ arr_num_C,
1471
+ INTEGER(arr_cat),
1472
+ INTEGER(arr_ord),
1473
+ &cat_levels,
1474
+ &ord_levels,
1475
+ &colnames_num,
1476
+ &colnames_cat,
1477
+ &colnames_ord,
1478
+ &min_date,
1479
+ &min_ts
1480
+ };
1481
+
1482
+ Rcpp::List outp;
1483
+ try {
1484
+ outp = Rcpp::unwindProtect(describe_outliers_wrapper, (void*)&temp);
1485
+ } catch(...) {
1486
+ forget_row_outputs(*model_outputs);
1487
+ throw;
1488
+ }
1211
1489
  forget_row_outputs(*model_outputs);
1490
+ outp["found_outliers"] = Rcpp::LogicalVector(found_outliers);
1212
1491
  return outp;
1213
1492
  }
1214
1493
 
1215
- // [[Rcpp::export]]
1494
+ // [[Rcpp::export(rng = false)]]
1216
1495
  Rcpp::LogicalVector check_few_values(Rcpp::NumericVector arr_num, size_t nrows, size_t ncols, int nthreads)
1217
1496
  {
1218
- std::vector<char> too_few_vals(ncols, 0);
1219
- check_more_two_values(&arr_num[0], nrows, ncols, nthreads, too_few_vals.data());
1220
1497
  Rcpp::LogicalVector outp(ncols);
1498
+ std::vector<char> too_few_vals(ncols, 0);
1499
+ check_more_two_values(REAL(arr_num), nrows, ncols, nthreads, too_few_vals.data());
1221
1500
  for (size_t col = 0; col < ncols; col++) {
1222
1501
  outp[col] = (bool) too_few_vals[col];
1223
1502
  }
1224
1503
  return outp;
1225
1504
  }
1505
+
1506
+
1507
+ // [[Rcpp::export(rng = false)]]
1508
+ bool R_has_openmp()
1509
+ {
1510
+ #ifdef _OPENMP
1511
+ return true;
1512
+ #else
1513
+ return false;
1514
+ #endif
1515
+ }
1516
+
1517
+ #endif /* _FOR_R */
@@ -74,7 +74,7 @@
74
74
  */
75
75
  void find_outlier_categories(size_t categ_counts[], size_t ncateg, size_t tot, double max_perc_outliers,
76
76
  long double perc_threshold[], size_t buffer_ix[], long double buffer_perc[],
77
- double z_norm, char is_outlier[], bool *found_outliers, bool *new_is_outlier,
77
+ double z_norm, signed char is_outlier[], bool *found_outliers, bool *new_is_outlier,
78
78
  double *next_most_comm)
79
79
  {
80
80
  //TODO: must also establish bounds for new, unseen categories
@@ -90,7 +90,7 @@ void find_outlier_categories(size_t categ_counts[], size_t ncateg, size_t tot, d
90
90
  size_t size_tail = 0;
91
91
 
92
92
  /* reset the temporary arrays and fill them */
93
- memset(is_outlier, 0, ncateg * sizeof(char));
93
+ memset(is_outlier, 0, ncateg * sizeof(signed char));
94
94
  for (size_t cat = 0; cat < ncateg; cat++) {
95
95
  buffer_ix[cat] = cat;
96
96
  buffer_perc[cat] = (categ_counts[cat] > 0)? ((long double)categ_counts[cat] / tot_dbl) : 0;
@@ -225,13 +225,13 @@ void find_outlier_categories(size_t categ_counts[], size_t ncateg, size_t tot, d
225
225
  * Category to which the majority of the observations belong.
226
226
  */
227
227
  void find_outlier_categories_by_maj(size_t categ_counts[], size_t ncateg, size_t tot, double max_perc_outliers,
228
- long double prior_prob[], double z_outlier, char is_outlier[],
228
+ long double prior_prob[], double z_outlier, signed char is_outlier[],
229
229
  bool *found_outliers, bool *new_is_outlier, int *categ_maj)
230
230
  {
231
231
  /* initialize parameters as needed */
232
232
  *found_outliers = false;
233
233
  *new_is_outlier = false;
234
- memset(is_outlier, 0, ncateg * sizeof(char));
234
+ memset(is_outlier, 0, ncateg * sizeof(signed char));
235
235
  size_t max_outliers = (size_t) calculate_max_outliers((long double)tot, max_perc_outliers);
236
236
  long double tot_dbl = (long double) (tot + 1);
237
237
  size_t n_non_maj;
@@ -283,7 +283,7 @@ void find_outlier_categories_by_maj(size_t categ_counts[], size_t ncateg, size_t
283
283
  * Proportion of the least common non-outlier category.
284
284
  */
285
285
  bool find_outlier_categories_no_cond(size_t categ_counts[], size_t ncateg, size_t tot,
286
- char is_outlier[], double *next_most_comm)
286
+ signed char is_outlier[], double *next_most_comm)
287
287
  {
288
288
  /* if sample is too small, don't flag any as outliers */
289
289
  if (tot < 1000) return false;
@@ -296,7 +296,7 @@ bool find_outlier_categories_no_cond(size_t categ_counts[], size_t ncateg, size_
296
296
 
297
297
  /* look if there's any category meeting the first condition and none meeting the second one */
298
298
  bool has_outlier_cat = false;
299
- memset(is_outlier, 0, sizeof(char) * ncateg);
299
+ memset(is_outlier, 0, sizeof(signed char) * ncateg);
300
300
  for (size_t cat = 0; cat < ncateg; cat++) {
301
301
  if (categ_counts[cat] > max_outliers && categ_counts[cat] < max_next_most_comm) {
302
302
  has_outlier_cat = false;