isotree 0.2.2 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (152) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +12 -1
  3. data/LICENSE.txt +2 -2
  4. data/README.md +32 -14
  5. data/ext/isotree/ext.cpp +144 -31
  6. data/ext/isotree/extconf.rb +7 -7
  7. data/lib/isotree/isolation_forest.rb +110 -30
  8. data/lib/isotree/version.rb +1 -1
  9. data/vendor/isotree/LICENSE +1 -1
  10. data/vendor/isotree/README.md +165 -27
  11. data/vendor/isotree/include/isotree.hpp +2116 -0
  12. data/vendor/isotree/include/isotree_oop.hpp +394 -0
  13. data/vendor/isotree/inst/COPYRIGHTS +132 -0
  14. data/vendor/isotree/src/RcppExports.cpp +594 -57
  15. data/vendor/isotree/src/Rwrapper.cpp +2452 -304
  16. data/vendor/isotree/src/c_interface.cpp +958 -0
  17. data/vendor/isotree/src/crit.hpp +4236 -0
  18. data/vendor/isotree/src/digamma.hpp +184 -0
  19. data/vendor/isotree/src/dist.hpp +1886 -0
  20. data/vendor/isotree/src/exp_depth_table.hpp +134 -0
  21. data/vendor/isotree/src/extended.hpp +1444 -0
  22. data/vendor/isotree/src/external_facing_generic.hpp +399 -0
  23. data/vendor/isotree/src/fit_model.hpp +2401 -0
  24. data/vendor/isotree/src/{dealloc.cpp → headers_joined.hpp} +38 -22
  25. data/vendor/isotree/src/helpers_iforest.hpp +814 -0
  26. data/vendor/isotree/src/{impute.cpp → impute.hpp} +382 -123
  27. data/vendor/isotree/src/indexer.cpp +515 -0
  28. data/vendor/isotree/src/instantiate_template_headers.cpp +118 -0
  29. data/vendor/isotree/src/instantiate_template_headers.hpp +240 -0
  30. data/vendor/isotree/src/isoforest.hpp +1659 -0
  31. data/vendor/isotree/src/isotree.hpp +1815 -394
  32. data/vendor/isotree/src/isotree_exportable.hpp +99 -0
  33. data/vendor/isotree/src/merge_models.cpp +159 -16
  34. data/vendor/isotree/src/mult.hpp +1321 -0
  35. data/vendor/isotree/src/oop_interface.cpp +844 -0
  36. data/vendor/isotree/src/oop_interface.hpp +278 -0
  37. data/vendor/isotree/src/other_helpers.hpp +219 -0
  38. data/vendor/isotree/src/predict.hpp +1932 -0
  39. data/vendor/isotree/src/python_helpers.hpp +114 -0
  40. data/vendor/isotree/src/ref_indexer.hpp +154 -0
  41. data/vendor/isotree/src/robinmap/LICENSE +21 -0
  42. data/vendor/isotree/src/robinmap/README.md +483 -0
  43. data/vendor/isotree/src/robinmap/include/tsl/robin_growth_policy.h +406 -0
  44. data/vendor/isotree/src/robinmap/include/tsl/robin_hash.h +1639 -0
  45. data/vendor/isotree/src/robinmap/include/tsl/robin_map.h +807 -0
  46. data/vendor/isotree/src/robinmap/include/tsl/robin_set.h +660 -0
  47. data/vendor/isotree/src/serialize.cpp +4316 -139
  48. data/vendor/isotree/src/sql.cpp +143 -61
  49. data/vendor/isotree/src/subset_models.cpp +174 -0
  50. data/vendor/isotree/src/utils.hpp +3786 -0
  51. data/vendor/isotree/src/xoshiro.hpp +463 -0
  52. data/vendor/isotree/src/ziggurat.hpp +405 -0
  53. metadata +40 -105
  54. data/vendor/cereal/LICENSE +0 -24
  55. data/vendor/cereal/README.md +0 -85
  56. data/vendor/cereal/include/cereal/access.hpp +0 -351
  57. data/vendor/cereal/include/cereal/archives/adapters.hpp +0 -163
  58. data/vendor/cereal/include/cereal/archives/binary.hpp +0 -169
  59. data/vendor/cereal/include/cereal/archives/json.hpp +0 -1019
  60. data/vendor/cereal/include/cereal/archives/portable_binary.hpp +0 -334
  61. data/vendor/cereal/include/cereal/archives/xml.hpp +0 -956
  62. data/vendor/cereal/include/cereal/cereal.hpp +0 -1089
  63. data/vendor/cereal/include/cereal/details/helpers.hpp +0 -422
  64. data/vendor/cereal/include/cereal/details/polymorphic_impl.hpp +0 -796
  65. data/vendor/cereal/include/cereal/details/polymorphic_impl_fwd.hpp +0 -65
  66. data/vendor/cereal/include/cereal/details/static_object.hpp +0 -127
  67. data/vendor/cereal/include/cereal/details/traits.hpp +0 -1411
  68. data/vendor/cereal/include/cereal/details/util.hpp +0 -84
  69. data/vendor/cereal/include/cereal/external/base64.hpp +0 -134
  70. data/vendor/cereal/include/cereal/external/rapidjson/allocators.h +0 -284
  71. data/vendor/cereal/include/cereal/external/rapidjson/cursorstreamwrapper.h +0 -78
  72. data/vendor/cereal/include/cereal/external/rapidjson/document.h +0 -2652
  73. data/vendor/cereal/include/cereal/external/rapidjson/encodedstream.h +0 -299
  74. data/vendor/cereal/include/cereal/external/rapidjson/encodings.h +0 -716
  75. data/vendor/cereal/include/cereal/external/rapidjson/error/en.h +0 -74
  76. data/vendor/cereal/include/cereal/external/rapidjson/error/error.h +0 -161
  77. data/vendor/cereal/include/cereal/external/rapidjson/filereadstream.h +0 -99
  78. data/vendor/cereal/include/cereal/external/rapidjson/filewritestream.h +0 -104
  79. data/vendor/cereal/include/cereal/external/rapidjson/fwd.h +0 -151
  80. data/vendor/cereal/include/cereal/external/rapidjson/internal/biginteger.h +0 -290
  81. data/vendor/cereal/include/cereal/external/rapidjson/internal/diyfp.h +0 -271
  82. data/vendor/cereal/include/cereal/external/rapidjson/internal/dtoa.h +0 -245
  83. data/vendor/cereal/include/cereal/external/rapidjson/internal/ieee754.h +0 -78
  84. data/vendor/cereal/include/cereal/external/rapidjson/internal/itoa.h +0 -308
  85. data/vendor/cereal/include/cereal/external/rapidjson/internal/meta.h +0 -186
  86. data/vendor/cereal/include/cereal/external/rapidjson/internal/pow10.h +0 -55
  87. data/vendor/cereal/include/cereal/external/rapidjson/internal/regex.h +0 -740
  88. data/vendor/cereal/include/cereal/external/rapidjson/internal/stack.h +0 -232
  89. data/vendor/cereal/include/cereal/external/rapidjson/internal/strfunc.h +0 -69
  90. data/vendor/cereal/include/cereal/external/rapidjson/internal/strtod.h +0 -290
  91. data/vendor/cereal/include/cereal/external/rapidjson/internal/swap.h +0 -46
  92. data/vendor/cereal/include/cereal/external/rapidjson/istreamwrapper.h +0 -128
  93. data/vendor/cereal/include/cereal/external/rapidjson/memorybuffer.h +0 -70
  94. data/vendor/cereal/include/cereal/external/rapidjson/memorystream.h +0 -71
  95. data/vendor/cereal/include/cereal/external/rapidjson/msinttypes/inttypes.h +0 -316
  96. data/vendor/cereal/include/cereal/external/rapidjson/msinttypes/stdint.h +0 -300
  97. data/vendor/cereal/include/cereal/external/rapidjson/ostreamwrapper.h +0 -81
  98. data/vendor/cereal/include/cereal/external/rapidjson/pointer.h +0 -1414
  99. data/vendor/cereal/include/cereal/external/rapidjson/prettywriter.h +0 -277
  100. data/vendor/cereal/include/cereal/external/rapidjson/rapidjson.h +0 -656
  101. data/vendor/cereal/include/cereal/external/rapidjson/reader.h +0 -2230
  102. data/vendor/cereal/include/cereal/external/rapidjson/schema.h +0 -2497
  103. data/vendor/cereal/include/cereal/external/rapidjson/stream.h +0 -223
  104. data/vendor/cereal/include/cereal/external/rapidjson/stringbuffer.h +0 -121
  105. data/vendor/cereal/include/cereal/external/rapidjson/writer.h +0 -709
  106. data/vendor/cereal/include/cereal/external/rapidxml/license.txt +0 -52
  107. data/vendor/cereal/include/cereal/external/rapidxml/manual.html +0 -406
  108. data/vendor/cereal/include/cereal/external/rapidxml/rapidxml.hpp +0 -2624
  109. data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_iterators.hpp +0 -175
  110. data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_print.hpp +0 -428
  111. data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_utils.hpp +0 -123
  112. data/vendor/cereal/include/cereal/macros.hpp +0 -154
  113. data/vendor/cereal/include/cereal/specialize.hpp +0 -139
  114. data/vendor/cereal/include/cereal/types/array.hpp +0 -79
  115. data/vendor/cereal/include/cereal/types/atomic.hpp +0 -55
  116. data/vendor/cereal/include/cereal/types/base_class.hpp +0 -203
  117. data/vendor/cereal/include/cereal/types/bitset.hpp +0 -176
  118. data/vendor/cereal/include/cereal/types/boost_variant.hpp +0 -164
  119. data/vendor/cereal/include/cereal/types/chrono.hpp +0 -72
  120. data/vendor/cereal/include/cereal/types/common.hpp +0 -129
  121. data/vendor/cereal/include/cereal/types/complex.hpp +0 -56
  122. data/vendor/cereal/include/cereal/types/concepts/pair_associative_container.hpp +0 -73
  123. data/vendor/cereal/include/cereal/types/deque.hpp +0 -62
  124. data/vendor/cereal/include/cereal/types/forward_list.hpp +0 -68
  125. data/vendor/cereal/include/cereal/types/functional.hpp +0 -43
  126. data/vendor/cereal/include/cereal/types/list.hpp +0 -62
  127. data/vendor/cereal/include/cereal/types/map.hpp +0 -36
  128. data/vendor/cereal/include/cereal/types/memory.hpp +0 -425
  129. data/vendor/cereal/include/cereal/types/optional.hpp +0 -66
  130. data/vendor/cereal/include/cereal/types/polymorphic.hpp +0 -483
  131. data/vendor/cereal/include/cereal/types/queue.hpp +0 -132
  132. data/vendor/cereal/include/cereal/types/set.hpp +0 -103
  133. data/vendor/cereal/include/cereal/types/stack.hpp +0 -76
  134. data/vendor/cereal/include/cereal/types/string.hpp +0 -61
  135. data/vendor/cereal/include/cereal/types/tuple.hpp +0 -123
  136. data/vendor/cereal/include/cereal/types/unordered_map.hpp +0 -36
  137. data/vendor/cereal/include/cereal/types/unordered_set.hpp +0 -99
  138. data/vendor/cereal/include/cereal/types/utility.hpp +0 -47
  139. data/vendor/cereal/include/cereal/types/valarray.hpp +0 -89
  140. data/vendor/cereal/include/cereal/types/variant.hpp +0 -109
  141. data/vendor/cereal/include/cereal/types/vector.hpp +0 -112
  142. data/vendor/cereal/include/cereal/version.hpp +0 -52
  143. data/vendor/isotree/src/Makevars +0 -4
  144. data/vendor/isotree/src/crit.cpp +0 -912
  145. data/vendor/isotree/src/dist.cpp +0 -749
  146. data/vendor/isotree/src/extended.cpp +0 -790
  147. data/vendor/isotree/src/fit_model.cpp +0 -1090
  148. data/vendor/isotree/src/helpers_iforest.cpp +0 -324
  149. data/vendor/isotree/src/isoforest.cpp +0 -771
  150. data/vendor/isotree/src/mult.cpp +0 -607
  151. data/vendor/isotree/src/predict.cpp +0 -853
  152. data/vendor/isotree/src/utils.cpp +0 -1566
@@ -18,11 +18,29 @@
18
18
  * [5] https://sourceforge.net/projects/iforest/
19
19
  * [6] https://math.stackexchange.com/questions/3388518/expected-number-of-paths-required-to-separate-elements-in-a-binary-tree
20
20
  * [7] Quinlan, J. Ross. C4. 5: programs for machine learning. Elsevier, 2014.
21
- * [8] Cortes, David. "Distance approximation using Isolation Forests." arXiv preprint arXiv:1910.12362 (2019).
22
- * [9] Cortes, David. "Imputing missing values with unsupervised random trees." arXiv preprint arXiv:1911.06646 (2019).
21
+ * [8] Cortes, David.
22
+ * "Distance approximation using Isolation Forests."
23
+ * arXiv preprint arXiv:1910.12362 (2019).
24
+ * [9] Cortes, David.
25
+ * "Imputing missing values with unsupervised random trees."
26
+ * arXiv preprint arXiv:1911.06646 (2019).
27
+ * [10] https://math.stackexchange.com/questions/3333220/expected-average-depth-in-random-binary-tree-constructed-top-to-bottom
28
+ * [11] Cortes, David.
29
+ * "Revisiting randomized choices in isolation forests."
30
+ * arXiv preprint arXiv:2110.13402 (2021).
31
+ * [12] Guha, Sudipto, et al.
32
+ * "Robust random cut forest based anomaly detection on streams."
33
+ * International conference on machine learning. PMLR, 2016.
34
+ * [13] Cortes, David.
35
+ * "Isolation forests: looking beyond tree depth."
36
+ * arXiv preprint arXiv:2111.11639 (2021).
37
+ * [14] Ting, Kai Ming, Yue Zhu, and Zhi-Hua Zhou.
38
+ * "Isolation kernel and its effect on SVM"
39
+ * Proceedings of the 24th ACM SIGKDD
40
+ * International Conference on Knowledge Discovery & Data Mining. 2018.
23
41
  *
24
42
  * BSD 2-Clause License
25
- * Copyright (c) 2020, David Cortes
43
+ * Copyright (c) 2019-2023, David Cortes
26
44
  * All rights reserved.
27
45
  * Redistribution and use in source and binary forms, with or without
28
46
  * modification, are permitted provided that the following conditions are met:
@@ -45,157 +63,500 @@
45
63
  #ifdef _FOR_R
46
64
 
47
65
  #include <Rcpp.h>
48
- // [[Rcpp::plugins(cpp11)]]
66
+ #include <Rcpp/unwindProtect.h>
67
+ #include <Rinternals.h>
68
+ #include <R_ext/Altrep.h>
49
69
 
50
- /* This is to serialize the model objects */
51
- // [[Rcpp::depends(Rcereal)]]
52
- #include <cereal/archives/binary.hpp>
53
- #include <cereal/types/vector.hpp>
54
- #include <sstream>
55
- #include <string>
70
+ #include <type_traits>
56
71
 
57
72
  /* This is the package's header */
58
73
  #include "isotree.hpp"
59
74
 
75
+ /* Library is templated, base R comes with only these 2 types though */
76
+ #include "headers_joined.hpp"
77
+ #define real_t double
78
+ #define sparse_ix int
79
+ #include "instantiate_template_headers.hpp"
80
+
81
+ /* For imputing CSR matrices with differing columns from input */
82
+ #include "other_helpers.hpp"
83
+
84
+ /* Note: the R version calls the 'sort_csc_indices' templated function,
85
+ so it's not enough to just include 'isotree_exportable.hpp' and let
86
+ the templates be instantiated elsewhere. */
87
+
88
+ #define throw_mem_err() throw Rcpp::exception("Error: insufficient memory. Try smaller sample sizes and fewer trees.\n")
89
+
90
+ SEXP alloc_RawVec(void *data)
91
+ {
92
+ size_t vecsize = *(size_t*)data;
93
+ if (unlikely(vecsize > (size_t)std::numeric_limits<R_xlen_t>::max()))
94
+ Rcpp::stop("Object is too big for R to handle.");
95
+ return Rcpp::RawVector((R_xlen_t)vecsize);
96
+ }
97
+
98
+ SEXP safe_copy_vec(void *data)
99
+ {
100
+ std::vector<double> *vec = (std::vector<double>*)data;
101
+ return Rcpp::NumericVector(vec->begin(), vec->end());
102
+ }
103
+
104
+ SEXP safe_copy_intvec(void *data)
105
+ {
106
+ std::vector<int> *vec = (std::vector<int>*)data;
107
+ return Rcpp::IntegerVector(vec->begin(), vec->end());
108
+ }
109
+
110
+ SEXP safe_int_matrix(void *dims)
111
+ {
112
+ size_t *dims_ = (size_t*)dims;
113
+ size_t nrows = dims_[0];
114
+ size_t ncols = dims_[1];
115
+ return Rcpp::IntegerMatrix(nrows, ncols);
116
+ }
117
+
118
+ template <class Model>
119
+ SEXP safe_XPtr(void *model_ptr)
120
+ {
121
+ return Rcpp::XPtr<Model>((Model*)model_ptr, true);
122
+ }
123
+
124
+ SEXP safe_errlist(void *ignored)
125
+ {
126
+ return Rcpp::List::create(Rcpp::_["err"] = Rcpp::LogicalVector::create(1));
127
+ }
128
+
129
+ SEXP safe_FALSE(void *ignored)
130
+ {
131
+ return Rcpp::LogicalVector::create(0);
132
+ }
133
+
134
+ Rcpp::RawVector resize_vec(Rcpp::RawVector inp, size_t new_size)
135
+ {
136
+ Rcpp::RawVector out = Rcpp::unwindProtect(alloc_RawVec, (void*)&new_size);
137
+ memcpy(RAW(out), RAW(inp), std::min((size_t)inp.size(), new_size));
138
+ return out;
139
+ }
140
+
60
141
  /* for model serialization and re-usage in R */
61
142
  /* https://stackoverflow.com/questions/18474292/how-to-handle-c-internal-data-structure-in-r-in-order-to-allow-save-load */
62
143
  /* this extra comment below the link is a workaround for Rcpp issue 675 in GitHub, do not remove it */
63
- #include <Rinternals.h>
64
- template <class T>
65
- Rcpp::RawVector serialize_cpp_obj(T *model_outputs)
144
+ template <class Model>
145
+ Rcpp::RawVector serialize_cpp_obj(const Model *model_outputs)
66
146
  {
67
- std::stringstream ss;
68
- {
69
- cereal::BinaryOutputArchive oarchive(ss); // Create an output archive
70
- oarchive(*model_outputs);
71
- }
72
- ss.seekg(0, ss.end);
73
- /* Checking for potential integer overflows */
74
- std::stringstream::pos_type vec_size = ss.tellg();
75
- if (vec_size <= 0) {
76
- Rcpp::Rcerr << "Error: model is too big to serialize, resulting object will not be usable.\n" << std::endl;
77
- return Rcpp::RawVector();
78
- }
79
- Rcpp::RawVector retval((size_t) vec_size);
80
- ss.seekg(0, ss.beg);
81
- ss.read(reinterpret_cast<char*>(&retval[0]), retval.size());
82
- return retval;
147
+ size_t serialized_size = determine_serialized_size(*model_outputs);
148
+ if (unlikely(!serialized_size))
149
+ throw Rcpp::exception("Unexpected error.");
150
+ if (unlikely(serialized_size > (size_t)std::numeric_limits<R_xlen_t>::max()))
151
+ throw Rcpp::exception("Resulting model is too large for R to handle.");
152
+ Rcpp::RawVector out = Rcpp::unwindProtect(alloc_RawVec, (void*)&serialized_size);
153
+ char *out_ = (char*)RAW(out);
154
+ serialize_isotree(*model_outputs, out_);
155
+ return out;
83
156
  }
84
157
 
85
- template <class T>
158
+ template <class Model>
86
159
  SEXP deserialize_cpp_obj(Rcpp::RawVector src)
87
160
  {
88
- std::stringstream ss;
89
- ss.write(reinterpret_cast<char*>(&src[0]), src.size());
90
- ss.seekg(0, ss.beg);
91
- std::unique_ptr<T> model_outputs = std::unique_ptr<T>(new T());
92
- {
93
- cereal::BinaryInputArchive iarchive(ss);
94
- iarchive(*model_outputs);
95
- }
96
- return Rcpp::XPtr<T>(model_outputs.release(), true);
161
+ if (unlikely(!src.size()))
162
+ Rcpp::stop("Unexpected error.");
163
+ std::unique_ptr<Model> out(new Model());
164
+ const char *inp = (const char*)RAW(src);
165
+ deserialize_isotree(*out, inp);
166
+ SEXP out_ = Rcpp::unwindProtect(safe_XPtr<Model>, out.get());
167
+ out.release();
168
+ return out_;
97
169
  }
98
170
 
99
- // [[Rcpp::export]]
171
+ // [[Rcpp::export(rng = false)]]
100
172
  SEXP deserialize_IsoForest(Rcpp::RawVector src)
101
173
  {
102
174
  return deserialize_cpp_obj<IsoForest>(src);
103
175
  }
104
176
 
105
- // [[Rcpp::export]]
177
+ // [[Rcpp::export(rng = false)]]
106
178
  SEXP deserialize_ExtIsoForest(Rcpp::RawVector src)
107
179
  {
108
180
  return deserialize_cpp_obj<ExtIsoForest>(src);
109
181
  }
110
182
 
111
- // [[Rcpp::export]]
183
+ // [[Rcpp::export(rng = false)]]
112
184
  SEXP deserialize_Imputer(Rcpp::RawVector src)
113
185
  {
114
186
  return deserialize_cpp_obj<Imputer>(src);
115
187
  }
116
188
 
117
- // [[Rcpp::export]]
118
- Rcpp::LogicalVector check_null_ptr_model(SEXP ptr_model)
189
+ // [[Rcpp::export(rng = false)]]
190
+ SEXP deserialize_Indexer(Rcpp::RawVector src)
191
+ {
192
+ return deserialize_cpp_obj<TreesIndexer>(src);
193
+ }
194
+
195
+ // [[Rcpp::export(rng = false)]]
196
+ SEXP serialize_IsoForest_from_ptr(SEXP R_ptr)
197
+ {
198
+ const IsoForest* model = (const IsoForest*)R_ExternalPtrAddr(R_ptr);
199
+ return serialize_cpp_obj<IsoForest>(model);
200
+ }
201
+
202
+ // [[Rcpp::export(rng = false)]]
203
+ SEXP serialize_ExtIsoForest_from_ptr(SEXP R_ptr)
204
+ {
205
+ const ExtIsoForest* model = (const ExtIsoForest*)R_ExternalPtrAddr(R_ptr);
206
+ return serialize_cpp_obj<ExtIsoForest>(model);
207
+ }
208
+
209
+ // [[Rcpp::export(rng = false)]]
210
+ SEXP serialize_Imputer_from_ptr(SEXP R_ptr)
211
+ {
212
+ const Imputer* model = (const Imputer*)R_ExternalPtrAddr(R_ptr);
213
+ return serialize_cpp_obj<Imputer>(model);
214
+ }
215
+
216
+ // [[Rcpp::export(rng = false)]]
217
+ SEXP serialize_Indexer_from_ptr(SEXP R_ptr)
218
+ {
219
+ const TreesIndexer* model = (const TreesIndexer*)R_ExternalPtrAddr(R_ptr);
220
+ return serialize_cpp_obj<TreesIndexer>(model);
221
+ }
222
+
223
+ // [[Rcpp::export(rng = false)]]
224
+ Rcpp::LogicalVector check_null_ptr_model_internal(SEXP ptr_model)
119
225
  {
120
226
  return Rcpp::LogicalVector(R_ExternalPtrAddr(ptr_model) == NULL);
121
227
  }
122
228
 
229
+ static R_altrep_class_t altrepped_pointer_IsoForest;
230
+ static R_altrep_class_t altrepped_pointer_ExtIsoForest;
231
+ static R_altrep_class_t altrepped_pointer_Imputer;
232
+ static R_altrep_class_t altrepped_pointer_TreesIndexer;
233
+ static R_altrep_class_t altrepped_pointer_NullPointer;
234
+
235
+ template <class Model>
236
+ R_altrep_class_t get_altrep_obj_class()
237
+ {
238
+ if (std::is_same<Model, IsoForest>::value) return altrepped_pointer_IsoForest;
239
+
240
+ if (std::is_same<Model, ExtIsoForest>::value) return altrepped_pointer_ExtIsoForest;
241
+
242
+ if (std::is_same<Model, Imputer>::value) return altrepped_pointer_Imputer;
243
+
244
+ if (std::is_same<Model, TreesIndexer>::value) return altrepped_pointer_TreesIndexer;
245
+
246
+ throw Rcpp::exception("Internal error. Please open a bug report.");
247
+ }
248
+
249
+ R_xlen_t altrepped_pointer_length(SEXP obj)
250
+ {
251
+ return 1;
252
+ }
253
+
254
+ SEXP get_element_from_altrepped_obj(SEXP R_altrepped_obj, R_xlen_t idx)
255
+ {
256
+ return R_altrep_data1(R_altrepped_obj);
257
+ }
258
+
259
+ template <class Model>
260
+ void delete_model_from_R_ptr(SEXP R_ptr)
261
+ {
262
+ Model *cpp_ptr = (Model*)R_ExternalPtrAddr(R_ptr);
263
+ delete cpp_ptr;
264
+ R_SetExternalPtrAddr(R_ptr, nullptr);
265
+ R_ClearExternalPtr(R_ptr);
266
+ }
267
+
268
+ template <class Model>
269
+ SEXP get_altrepped_pointer(void *void_ptr)
270
+ {
271
+ SEXP R_ptr_name = PROTECT(Rf_mkString("ptr"));
272
+ SEXP R_ptr_class = PROTECT(Rf_mkString("isotree_altrepped_handle"));
273
+ SEXP R_ptr = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
274
+ SEXP out = PROTECT(R_new_altrep(get_altrep_obj_class<Model>(), R_NilValue, R_NilValue));
275
+
276
+ std::unique_ptr<Model> *ptr = (std::unique_ptr<Model>*)void_ptr;
277
+ R_SetExternalPtrAddr(R_ptr, ptr->get());
278
+ R_RegisterCFinalizerEx(R_ptr, delete_model_from_R_ptr<Model>, TRUE);
279
+ ptr->release();
280
+
281
+ R_set_altrep_data1(out, R_ptr);
282
+ Rf_setAttrib(out, R_NamesSymbol, R_ptr_name);
283
+ Rf_setAttrib(out, R_ClassSymbol, R_ptr_class);
284
+
285
+ UNPROTECT(4);
286
+ return out;
287
+ }
288
+
289
+ template <class Model>
290
+ SEXP serialize_altrepped_pointer(SEXP altrepped_obj)
291
+ {
292
+ try {
293
+ Model *cpp_ptr = (Model*)R_ExternalPtrAddr(R_altrep_data1(altrepped_obj));
294
+ R_xlen_t state_size = determine_serialized_size(*cpp_ptr);
295
+ SEXP R_state = PROTECT(Rf_allocVector(RAWSXP, state_size));
296
+ serialize_isotree(*cpp_ptr, (char*)RAW(R_state));
297
+ UNPROTECT(1);
298
+ return R_state;
299
+ }
300
+ catch (const std::exception &ex) {
301
+ Rf_error("%s\n", ex.what());
302
+ }
303
+
304
+ return R_NilValue; /* <- won't be reached */
305
+ }
306
+
307
+ template <class Model>
308
+ SEXP deserialize_altrepped_pointer(SEXP cls, SEXP R_state)
309
+ {
310
+ SEXP R_ptr_name = PROTECT(Rf_mkString("ptr"));
311
+ SEXP R_ptr_class = PROTECT(Rf_mkString("isotree_altrepped_handle"));
312
+ SEXP R_ptr = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
313
+ SEXP out = PROTECT(R_new_altrep(get_altrep_obj_class<Model>(), R_NilValue, R_NilValue));
314
+
315
+ try {
316
+ std::unique_ptr<Model> model(new Model());
317
+ const char *inp = (const char*)RAW(R_state);
318
+ deserialize_isotree(*model, inp);
319
+
320
+ R_SetExternalPtrAddr(R_ptr, model.get());
321
+ R_RegisterCFinalizerEx(R_ptr, delete_model_from_R_ptr<Model>, TRUE);
322
+ model.release();
323
+ }
324
+ catch (const std::exception &ex) {
325
+ Rf_error("%s\n", ex.what());
326
+ }
327
+
328
+ R_set_altrep_data1(out, R_ptr);
329
+ Rf_setAttrib(out, R_NamesSymbol, R_ptr_name);
330
+ Rf_setAttrib(out, R_ClassSymbol, R_ptr_class);
331
+
332
+ UNPROTECT(4);
333
+ return out;
334
+ }
335
+
336
+ template <class Model>
337
+ SEXP duplicate_altrepped_pointer(SEXP altrepped_obj, Rboolean deep)
338
+ {
339
+ SEXP R_ptr_name = PROTECT(Rf_mkString("ptr"));
340
+ SEXP R_ptr_class = PROTECT(Rf_mkString("isotree_altrepped_handle"));
341
+ SEXP out = PROTECT(R_new_altrep(get_altrep_obj_class<Model>(), R_NilValue, R_NilValue));
342
+
343
+ if (!deep) {
344
+ R_set_altrep_data1(out, R_altrep_data1(altrepped_obj));
345
+ }
346
+
347
+ else {
348
+
349
+ SEXP R_ptr = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
350
+
351
+ try {
352
+ std::unique_ptr<Model> new_obj(new Model());
353
+ Model *cpp_ptr = (Model*)R_ExternalPtrAddr(R_altrep_data1(altrepped_obj));
354
+ *new_obj = *cpp_ptr;
355
+
356
+ R_SetExternalPtrAddr(R_ptr, new_obj.get());
357
+ R_RegisterCFinalizerEx(R_ptr, delete_model_from_R_ptr<Model>, TRUE);
358
+ new_obj.release();
359
+ }
360
+
361
+ catch (const std::exception &ex) {
362
+ Rf_error("%s\n", ex.what());
363
+ }
364
+
365
+ R_set_altrep_data1(out, R_ptr);
366
+ UNPROTECT(1);
367
+ }
368
+
369
+ Rf_setAttrib(out, R_NamesSymbol, R_ptr_name);
370
+ Rf_setAttrib(out, R_NamesSymbol, R_ptr_class);
371
+ UNPROTECT(3);
372
+ return out;
373
+ }
374
+
375
+ SEXP get_altrepped_null_pointer()
376
+ {
377
+ SEXP R_ptr_name = PROTECT(Rf_mkString("ptr"));
378
+ SEXP R_ptr_class = PROTECT(Rf_mkString("isotree_altrepped_handle"));
379
+ SEXP R_ptr = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
380
+ SEXP out = PROTECT(R_new_altrep(altrepped_pointer_NullPointer, R_ptr, R_NilValue));
381
+ Rf_setAttrib(out, R_NamesSymbol, R_ptr_name);
382
+ Rf_setAttrib(out, R_ClassSymbol, R_ptr_class);
383
+ UNPROTECT(4);
384
+ return out;
385
+ }
386
+
387
+ SEXP safe_get_altrepped_null_pointer(void *unused)
388
+ {
389
+ return get_altrepped_null_pointer();
390
+ }
391
+
392
+ SEXP serialize_altrepped_null(SEXP altrepped_obj)
393
+ {
394
+ return Rf_allocVector(RAWSXP, 0);
395
+ }
396
+
397
+ SEXP deserialize_altrepped_null(SEXP cls, SEXP R_state)
398
+ {
399
+ return get_altrepped_null_pointer();
400
+ }
401
+
402
+ SEXP duplicate_altrepped_pointer(SEXP altrepped_obj, Rboolean deep)
403
+ {
404
+ return get_altrepped_null_pointer();
405
+ }
406
+
407
+ Rboolean inspect_altrepped_pointer(SEXP x, int pre, int deep, int pvec, void (*inspect_subtree)(SEXP, int, int, int))
408
+ {
409
+ Rcpp::Rcout << "Altrepped pointer [address:" << R_ExternalPtrAddr(R_altrep_data1(x)) << "]\n";
410
+ return TRUE;
411
+ }
412
+
413
+ template <class Model>
414
+ Model* get_pointer_from_altrep(SEXP altrepped_obj)
415
+ {
416
+ return (Model*)R_ExternalPtrAddr(R_altrep_data1(altrepped_obj));
417
+ }
418
+
419
+ template <class Model>
420
+ Model* get_pointer_from_xptr(SEXP R_ptr)
421
+ {
422
+ return (Model*)R_ExternalPtrAddr(R_ptr);
423
+ }
424
+
425
+ // [[Rcpp::init]]
426
+ void init_altrepped_vectors(DllInfo* dll)
427
+ {
428
+ altrepped_pointer_IsoForest = R_make_altlist_class("altrepped_pointer_IsoForest", "isotree", dll);
429
+ R_set_altrep_Length_method(altrepped_pointer_IsoForest, altrepped_pointer_length);
430
+ R_set_altrep_Inspect_method(altrepped_pointer_IsoForest, inspect_altrepped_pointer);
431
+ R_set_altrep_Serialized_state_method(altrepped_pointer_IsoForest, serialize_altrepped_pointer<IsoForest>);
432
+ R_set_altrep_Unserialize_method(altrepped_pointer_IsoForest, deserialize_altrepped_pointer<IsoForest>);
433
+ R_set_altrep_Duplicate_method(altrepped_pointer_IsoForest, duplicate_altrepped_pointer<IsoForest>);
434
+ R_set_altlist_Elt_method(altrepped_pointer_IsoForest, get_element_from_altrepped_obj);
435
+
436
+ altrepped_pointer_ExtIsoForest = R_make_altlist_class("altrepped_pointer_ExtIsoForest", "isotree", dll);
437
+ R_set_altrep_Length_method(altrepped_pointer_ExtIsoForest, altrepped_pointer_length);
438
+ R_set_altrep_Inspect_method(altrepped_pointer_ExtIsoForest, inspect_altrepped_pointer);
439
+ R_set_altrep_Serialized_state_method(altrepped_pointer_ExtIsoForest, serialize_altrepped_pointer<ExtIsoForest>);
440
+ R_set_altrep_Unserialize_method(altrepped_pointer_ExtIsoForest, deserialize_altrepped_pointer<ExtIsoForest>);
441
+ R_set_altrep_Duplicate_method(altrepped_pointer_ExtIsoForest, duplicate_altrepped_pointer<ExtIsoForest>);
442
+ R_set_altlist_Elt_method(altrepped_pointer_ExtIsoForest, get_element_from_altrepped_obj);
443
+
444
+ altrepped_pointer_Imputer = R_make_altlist_class("altrepped_pointer_Imputer", "isotree", dll);
445
+ R_set_altrep_Length_method(altrepped_pointer_Imputer, altrepped_pointer_length);
446
+ R_set_altrep_Inspect_method(altrepped_pointer_Imputer, inspect_altrepped_pointer);
447
+ R_set_altrep_Serialized_state_method(altrepped_pointer_Imputer, serialize_altrepped_pointer<Imputer>);
448
+ R_set_altrep_Unserialize_method(altrepped_pointer_Imputer, deserialize_altrepped_pointer<Imputer>);
449
+ R_set_altrep_Duplicate_method(altrepped_pointer_Imputer, duplicate_altrepped_pointer<Imputer>);
450
+ R_set_altlist_Elt_method(altrepped_pointer_Imputer, get_element_from_altrepped_obj);
451
+
452
+ altrepped_pointer_TreesIndexer = R_make_altlist_class("altrepped_pointer_TreesIndexer", "isotree", dll);
453
+ R_set_altrep_Length_method(altrepped_pointer_TreesIndexer, altrepped_pointer_length);
454
+ R_set_altrep_Inspect_method(altrepped_pointer_TreesIndexer, inspect_altrepped_pointer);
455
+ R_set_altrep_Serialized_state_method(altrepped_pointer_TreesIndexer, serialize_altrepped_pointer<TreesIndexer>);
456
+ R_set_altrep_Unserialize_method(altrepped_pointer_TreesIndexer, deserialize_altrepped_pointer<TreesIndexer>);
457
+ R_set_altrep_Duplicate_method(altrepped_pointer_TreesIndexer, duplicate_altrepped_pointer<TreesIndexer>);
458
+ R_set_altlist_Elt_method(altrepped_pointer_TreesIndexer, get_element_from_altrepped_obj);
459
+
460
+ altrepped_pointer_NullPointer = R_make_altlist_class("altrepped_pointer_NullPointer", "isotree", dll);
461
+ R_set_altrep_Length_method(altrepped_pointer_NullPointer, altrepped_pointer_length);
462
+ R_set_altrep_Inspect_method(altrepped_pointer_NullPointer, inspect_altrepped_pointer);
463
+ R_set_altrep_Serialized_state_method(altrepped_pointer_NullPointer, serialize_altrepped_null);
464
+ R_set_altrep_Unserialize_method(altrepped_pointer_NullPointer, deserialize_altrepped_null);
465
+ R_set_altrep_Duplicate_method(altrepped_pointer_NullPointer, duplicate_altrepped_pointer);
466
+ R_set_altlist_Elt_method(altrepped_pointer_NullPointer, get_element_from_altrepped_obj);
467
+ }
468
+
123
469
  double* set_R_nan_as_C_nan(double *x, size_t n, std::vector<double> &v, int nthreads)
124
470
  {
125
471
  v.assign(x, x + n);
126
- #pragma omp parallel for schedule(static) num_threads(nthreads) shared(x, v, n)
127
- for (size_t_for i = 0; i < n; i++)
128
- if (isnan(v[i]))
129
- v[i] = NAN;
472
+ for (size_t i = 0; i < n; i++)
473
+ if (unlikely(std::isnan(v[i]))) v[i] = NAN;
130
474
  return v.data();
131
475
  }
132
476
 
477
+ double* set_R_nan_as_C_nan(double *x, size_t n, Rcpp::NumericVector &v, int nthreads)
478
+ {
479
+ v = Rcpp::NumericVector(x, x + n);
480
+ for (size_t i = 0; i < n; i++)
481
+ if (unlikely(std::isnan(v[i]))) v[i] = NAN;
482
+ return REAL(v);
483
+ }
484
+
133
485
  double* set_R_nan_as_C_nan(double *x, size_t n, int nthreads)
134
486
  {
135
- #pragma omp parallel for schedule(static) num_threads(nthreads) shared(x, n)
136
- for (size_t_for i = 0; i < n; i++)
137
- if (isnan(x[i]))
138
- x[i] = NAN;
487
+ for (size_t i = 0; i < n; i++)
488
+ if (unlikely(std::isnan(x[i]))) x[i] = NAN;
139
489
  return x;
140
490
  }
141
491
 
142
- // [[Rcpp::export]]
492
+ TreesIndexer* get_indexer_ptr_from_R_obj(SEXP indexer_R_ptr)
493
+ {
494
+ TreesIndexer *out = get_pointer_from_xptr<TreesIndexer>(indexer_R_ptr);
495
+ if (out && out->indices.empty()) out = nullptr;
496
+ return out;
497
+ }
498
+
499
+ // [[Rcpp::export(rng = false)]]
143
500
  Rcpp::List fit_model(Rcpp::NumericVector X_num, Rcpp::IntegerVector X_cat, Rcpp::IntegerVector ncat,
144
501
  Rcpp::NumericVector Xc, Rcpp::IntegerVector Xc_ind, Rcpp::IntegerVector Xc_indptr,
145
502
  Rcpp::NumericVector sample_weights, Rcpp::NumericVector col_weights,
146
503
  size_t nrows, size_t ncols_numeric, size_t ncols_categ, size_t ndim, size_t ntry,
147
504
  Rcpp::CharacterVector coef_type, bool coef_by_prop, bool with_replacement, bool weight_as_sample,
148
- size_t sample_size, size_t ntrees, size_t max_depth, bool limit_depth,
149
- bool penalize_range, bool calc_dist, bool standardize_dist, bool sq_dist,
505
+ size_t sample_size, size_t ntrees, size_t max_depth, size_t ncols_per_tree, bool limit_depth,
506
+ bool penalize_range, bool standardize_data,
507
+ Rcpp::CharacterVector scoring_metric, bool fast_bratio,
508
+ bool calc_dist, bool standardize_dist, bool sq_dist,
150
509
  bool calc_depth, bool standardize_depth, bool weigh_by_kurt,
151
- double prob_pick_by_gain_avg, double prob_split_by_gain_avg,
152
- double prob_pick_by_gain_pl, double prob_split_by_gain_pl, double min_gain,
510
+ double prob_pick_by_gain_pl, double prob_pick_by_gain_avg,
511
+ double prob_pick_by_full_gain, double prob_pick_by_dens,
512
+ double prob_pick_col_by_range, double prob_pick_col_by_var,
513
+ double prob_pick_col_by_kurt, double min_gain,
153
514
  Rcpp::CharacterVector cat_split_type, Rcpp::CharacterVector new_cat_action,
154
515
  Rcpp::CharacterVector missing_action, bool all_perm,
155
516
  bool build_imputer, bool output_imputations, size_t min_imp_obs,
156
517
  Rcpp::CharacterVector depth_imp, Rcpp::CharacterVector weigh_imp_rows,
157
- int random_seed, bool handle_interrupt, int nthreads)
518
+ int random_seed, bool use_long_double, int nthreads, bool lazy_serialization)
158
519
  {
159
520
  double* numeric_data_ptr = NULL;
160
521
  int* categ_data_ptr = NULL;
161
522
  int* ncat_ptr = NULL;
162
523
  double* Xc_ptr = NULL;
163
- sparse_ix* Xc_ind_ptr = NULL;
164
- sparse_ix* Xc_indptr_ptr = NULL;
524
+ int* Xc_ind_ptr = NULL;
525
+ int* Xc_indptr_ptr = NULL;
165
526
  double* sample_weights_ptr = NULL;
166
527
  double* col_weights_ptr = NULL;
167
- std::vector<double> Xcpp;
528
+ Rcpp::NumericVector Xcpp;
168
529
 
169
530
  if (X_num.size())
170
531
  {
171
- numeric_data_ptr = &X_num[0];
172
- if (Rcpp::as<std::string>(missing_action) != std::string("fail"))
532
+ numeric_data_ptr = REAL(X_num);
533
+ if (Rcpp::as<std::string>(missing_action) != "fail")
173
534
  numeric_data_ptr = set_R_nan_as_C_nan(numeric_data_ptr, nrows * ncols_numeric, Xcpp, nthreads);
174
535
  }
175
536
 
176
537
  if (X_cat.size())
177
538
  {
178
- categ_data_ptr = &X_cat[0];
179
- ncat_ptr = &ncat[0];
539
+ categ_data_ptr = INTEGER(X_cat);
540
+ ncat_ptr = INTEGER(ncat);
180
541
  }
181
542
 
182
543
  if (Xc.size())
183
544
  {
184
- Xc_ptr = &Xc[0];
185
- Xc_ind_ptr = &Xc_ind[0];
186
- Xc_indptr_ptr = &Xc_indptr[0];
187
- if (Rcpp::as<std::string>(missing_action) != std::string("fail"))
545
+ Xc_ptr = REAL(Xc);
546
+ Xc_ind_ptr = INTEGER(Xc_ind);
547
+ Xc_indptr_ptr = INTEGER(Xc_indptr);
548
+ if (Rcpp::as<std::string>(missing_action) != "fail")
188
549
  Xc_ptr = set_R_nan_as_C_nan(Xc_ptr, Xc.size(), Xcpp, nthreads);
189
550
  }
190
551
 
191
552
  if (sample_weights.size())
192
553
  {
193
- sample_weights_ptr = &sample_weights[0];
554
+ sample_weights_ptr = REAL(sample_weights);
194
555
  }
195
556
 
196
557
  if (col_weights.size())
197
558
  {
198
- col_weights_ptr = &col_weights[0];
559
+ col_weights_ptr = REAL(col_weights);
199
560
  }
200
561
 
201
562
  CoefType coef_type_C = Normal;
@@ -204,47 +565,72 @@ Rcpp::List fit_model(Rcpp::NumericVector X_num, Rcpp::IntegerVector X_cat, Rcpp:
204
565
  MissingAction missing_action_C = Divide;
205
566
  UseDepthImp depth_imp_C = Higher;
206
567
  WeighImpRows weigh_imp_rows_C = Inverse;
568
+ ScoringMetric scoring_metric_C = Depth;
207
569
 
208
- if (Rcpp::as<std::string>(coef_type) == std::string("uniform"))
570
+ if (Rcpp::as<std::string>(coef_type) == "uniform")
209
571
  {
210
572
  coef_type_C = Uniform;
211
573
  }
212
- if (Rcpp::as<std::string>(cat_split_type) == std::string("single_categ"))
574
+ if (Rcpp::as<std::string>(cat_split_type) == "single_categ")
213
575
  {
214
576
  cat_split_type_C = SingleCateg;
215
577
  }
216
- if (Rcpp::as<std::string>(new_cat_action) == std::string("smallest"))
578
+ if (Rcpp::as<std::string>(new_cat_action) == "smallest")
217
579
  {
218
580
  new_cat_action_C = Smallest;
219
581
  }
220
- else if (Rcpp::as<std::string>(new_cat_action) == std::string("random"))
582
+ else if (Rcpp::as<std::string>(new_cat_action) == "random")
221
583
  {
222
584
  new_cat_action_C = Random;
223
585
  }
224
- if (Rcpp::as<std::string>(missing_action) == std::string("impute"))
586
+ if (Rcpp::as<std::string>(missing_action) == "impute")
225
587
  {
226
588
  missing_action_C = Impute;
227
589
  }
228
- else if (Rcpp::as<std::string>(missing_action) == std::string("fail"))
590
+ else if (Rcpp::as<std::string>(missing_action) == "fail")
229
591
  {
230
592
  missing_action_C = Fail;
231
593
  }
232
- if (Rcpp::as<std::string>(depth_imp) == std::string("lower"))
594
+ if (Rcpp::as<std::string>(depth_imp) == "lower")
233
595
  {
234
596
  depth_imp_C = Lower;
235
597
  }
236
- else if (Rcpp::as<std::string>(depth_imp) == std::string("same"))
598
+ else if (Rcpp::as<std::string>(depth_imp) == "same")
237
599
  {
238
600
  depth_imp_C = Same;
239
601
  }
240
- if (Rcpp::as<std::string>(weigh_imp_rows) == std::string("prop"))
602
+ if (Rcpp::as<std::string>(weigh_imp_rows) == "prop")
241
603
  {
242
604
  weigh_imp_rows_C = Prop;
243
605
  }
244
- else if (Rcpp::as<std::string>(weigh_imp_rows) == std::string("flat"))
606
+ else if (Rcpp::as<std::string>(weigh_imp_rows) == "flat")
245
607
  {
246
608
  weigh_imp_rows_C = Flat;
247
609
  }
610
+ if (Rcpp::as<std::string>(scoring_metric) == "adj_depth")
611
+ {
612
+ scoring_metric_C = AdjDepth;
613
+ }
614
+ else if (Rcpp::as<std::string>(scoring_metric) == "density")
615
+ {
616
+ scoring_metric_C = Density;
617
+ }
618
+ else if (Rcpp::as<std::string>(scoring_metric) == "adj_density")
619
+ {
620
+ scoring_metric_C = AdjDensity;
621
+ }
622
+ else if (Rcpp::as<std::string>(scoring_metric) == "boxed_density")
623
+ {
624
+ scoring_metric_C = BoxedDensity;
625
+ }
626
+ else if (Rcpp::as<std::string>(scoring_metric) == "boxed_density2")
627
+ {
628
+ scoring_metric_C = BoxedDensity2;
629
+ }
630
+ else if (Rcpp::as<std::string>(scoring_metric) == "boxed_ratio")
631
+ {
632
+ scoring_metric_C = BoxedRatio;
633
+ }
248
634
 
249
635
  Rcpp::NumericVector tmat = Rcpp::NumericVector();
250
636
  Rcpp::NumericMatrix dmat = Rcpp::NumericMatrix();
@@ -255,24 +641,56 @@ Rcpp::List fit_model(Rcpp::NumericVector X_num, Rcpp::IntegerVector X_cat, Rcpp:
255
641
 
256
642
  if (calc_dist)
257
643
  {
258
- tmat = Rcpp::NumericVector((nrows * (nrows - 1)) / 2);
259
- tmat_ptr = &tmat[0];
644
+ tmat = Rcpp::NumericVector(calc_ncomb(nrows));
645
+ tmat_ptr = REAL(tmat);
260
646
  if (sq_dist)
261
647
  {
262
- dmat = Rcpp::NumericMatrix(nrows);
263
- dmat_ptr = &dmat(0, 0);
648
+ dmat = Rcpp::NumericMatrix(nrows, nrows);
649
+ dmat_ptr = REAL(dmat);
264
650
  }
265
651
  }
266
652
 
267
653
  if (calc_depth)
268
654
  {
269
655
  depths = Rcpp::NumericVector(nrows);
270
- depths_ptr = &depths[0];
656
+ depths_ptr = REAL(depths);
657
+ }
658
+
659
+ Rcpp::List outp = Rcpp::List::create(
660
+ Rcpp::_["depths"] = depths,
661
+ Rcpp::_["tmat"] = tmat,
662
+ Rcpp::_["dmat"] = dmat,
663
+ Rcpp::_["model"] = R_NilValue,
664
+ Rcpp::_["imputer"] = R_NilValue,
665
+ Rcpp::_["indexer"] = R_NilValue,
666
+ Rcpp::_["imputed_num"] = R_NilValue,
667
+ Rcpp::_["imputed_cat"] = R_NilValue,
668
+ Rcpp::_["err"] = Rcpp::LogicalVector::create(1)
669
+ );
670
+
671
+ Rcpp::List model_lst_nonlazy = Rcpp::List::create(
672
+ Rcpp::_["ptr"] = R_NilValue,
673
+ Rcpp::_["ser"] = R_NilValue
674
+ );
675
+
676
+ Rcpp::List imputer_lst_nonlazy = Rcpp::List::create(
677
+ Rcpp::_["ptr"] = Rcpp::XPtr<void*>(nullptr, false),
678
+ Rcpp::_["ser"] = R_NilValue
679
+ );
680
+
681
+ if (lazy_serialization) {
682
+ outp["indexer"] = get_altrepped_null_pointer();
683
+ }
684
+ else {
685
+ outp["indexer"] = Rcpp::List::create(
686
+ Rcpp::_["ptr"] = Rcpp::XPtr<void*>(nullptr, false),
687
+ Rcpp::_["ser"] = R_NilValue
688
+ );
271
689
  }
272
690
 
273
- std::unique_ptr<IsoForest> model_ptr = std::unique_ptr<IsoForest>();
274
- std::unique_ptr<ExtIsoForest> ext_model_ptr = std::unique_ptr<ExtIsoForest>();
275
- std::unique_ptr<Imputer> imputer_ptr = std::unique_ptr<Imputer>();
691
+ std::unique_ptr<IsoForest> model_ptr(nullptr);
692
+ std::unique_ptr<ExtIsoForest> ext_model_ptr(nullptr);
693
+ std::unique_ptr<Imputer> imputer_ptr(nullptr);
276
694
 
277
695
  if (ndim == 1)
278
696
  model_ptr = std::unique_ptr<IsoForest>(new IsoForest());
@@ -289,141 +707,211 @@ Rcpp::List fit_model(Rcpp::NumericVector X_num, Rcpp::IntegerVector X_cat, Rcpp:
289
707
  Xc_ptr, Xc_ind_ptr, Xc_indptr_ptr,
290
708
  ndim, ntry, coef_type_C, coef_by_prop,
291
709
  sample_weights_ptr, with_replacement, weight_as_sample,
292
- nrows, sample_size, ntrees, max_depth,
293
- limit_depth, penalize_range,
710
+ nrows, sample_size, ntrees, max_depth, ncols_per_tree,
711
+ limit_depth, penalize_range, standardize_data,
712
+ scoring_metric_C, fast_bratio,
294
713
  standardize_dist, tmat_ptr,
295
714
  depths_ptr, standardize_depth,
296
715
  col_weights_ptr, weigh_by_kurt,
297
- prob_pick_by_gain_avg, prob_split_by_gain_avg,
298
- prob_pick_by_gain_pl, prob_split_by_gain_pl,
716
+ prob_pick_by_gain_pl, prob_pick_by_gain_avg,
717
+ prob_pick_by_full_gain, prob_pick_by_dens,
718
+ prob_pick_col_by_range, prob_pick_col_by_var,
719
+ prob_pick_col_by_kurt,
299
720
  min_gain, missing_action_C,
300
721
  cat_split_type_C, new_cat_action_C,
301
722
  all_perm, imputer_ptr.get(), min_imp_obs,
302
723
  depth_imp_C, weigh_imp_rows_C, output_imputations,
303
- (uint64_t) random_seed, handle_interrupt, nthreads);
304
-
724
+ (uint64_t) random_seed, use_long_double, nthreads);
725
+
726
+ Rcpp::checkUserInterrupt(); /* <- nothing is returned in this case */
727
+ /* Note to self: the procedure has its own interrupt checker, so when an interrupt
728
+ signal is triggered, first it will print a message about it, then re-issue the
729
+ signal, then check for interrupt through Rcpp's, which will return nothing to
730
+ the outside and will not raise any error. In this case, at least the user will
731
+ see the error message. Note that Rcpp's interrupt non-return, unlike R's, triggers
732
+ stack unwinding for C++ objects. */
733
+
734
+ /* Note to self: since the function for fitting the model uses the C++ exception system,
735
+ and the stop signals are translated into Rcpp stops, this section below should not
736
+ be reachable anyhow. */
305
737
  if (ret_val == EXIT_FAILURE)
306
738
  {
307
- return Rcpp::List::create(Rcpp::_["err"] = Rcpp::LogicalVector::create(1));
739
+ Rcpp::Rcerr << "Unexpected error" << std::endl;
740
+ return Rcpp::unwindProtect(safe_errlist, nullptr);
308
741
  }
309
742
 
310
743
  if (calc_dist && sq_dist)
311
- tmat_to_dense(tmat_ptr, dmat_ptr, nrows, !standardize_dist);
744
+ tmat_to_dense(tmat_ptr, dmat_ptr, nrows, standardize_dist? 0. : std::numeric_limits<double>::infinity());
312
745
 
313
746
  bool serialization_failed = false;
314
- Rcpp::RawVector serialized_obj;
315
- if (ndim == 1)
316
- serialized_obj = serialize_cpp_obj(model_ptr.get());
317
- else
318
- serialized_obj = serialize_cpp_obj(ext_model_ptr.get());
319
- if (!serialized_obj.size()) serialization_failed = true;
320
- if (serialization_failed) {
321
- if (ndim == 1)
322
- model_ptr.reset();
323
- else
324
- ext_model_ptr.reset();
325
- }
326
747
 
327
- Rcpp::List outp = Rcpp::List::create(
328
- Rcpp::_["serialized_obj"] = serialized_obj,
329
- Rcpp::_["depths"] = depths,
330
- Rcpp::_["tmat"] = tmat,
331
- Rcpp::_["dmat"] = dmat
332
- );
748
+ if (lazy_serialization)
749
+ {
750
+ if (ndim == 1) {
751
+ outp["model"] = Rcpp::unwindProtect(get_altrepped_pointer<IsoForest>, (void*)&model_ptr);
752
+ }
753
+ else {
754
+ outp["model"] = Rcpp::unwindProtect(get_altrepped_pointer<ExtIsoForest>, (void*)&ext_model_ptr);
755
+ }
333
756
 
334
- if (!serialization_failed)
757
+ if (build_imputer) {
758
+ outp["imputer"] = Rcpp::unwindProtect(get_altrepped_pointer<Imputer>, (void*)&imputer_ptr);
759
+ }
760
+ else {
761
+ outp["imputer"] = Rcpp::unwindProtect(safe_get_altrepped_null_pointer, nullptr);
762
+ }
763
+ }
764
+
765
+ else
335
766
  {
767
+ Rcpp::RawVector serialized_obj;
768
+ /* Note to self: the serialization functions use unwind protection internally. */
336
769
  if (ndim == 1)
337
- outp["model_ptr"] = Rcpp::XPtr<IsoForest>(model_ptr.release(), true);
770
+ serialized_obj = serialize_cpp_obj(model_ptr.get());
338
771
  else
339
- outp["model_ptr"] = Rcpp::XPtr<ExtIsoForest>(ext_model_ptr.release(), true);
340
- } else
341
- outp["model_ptr"] = R_NilValue;
772
+ serialized_obj = serialize_cpp_obj(ext_model_ptr.get());
342
773
 
343
- if (build_imputer && !serialization_failed)
344
- {
345
- outp["imputer_ser"] = serialize_cpp_obj(imputer_ptr.get());
346
- if (!Rf_xlength(outp["imputer_ser"]))
774
+ if (unlikely(!serialized_obj.size())) serialization_failed = true;
775
+ if (unlikely(serialization_failed)) {
776
+ throw Rcpp::exception("Error: insufficient memory\n");
777
+ }
778
+
779
+ model_lst_nonlazy["ser"] = serialized_obj;
780
+ if (ndim == 1) {
781
+ model_lst_nonlazy["ptr"] = Rcpp::unwindProtect(safe_XPtr<IsoForest>, model_ptr.get());
782
+ model_ptr.release();
783
+ }
784
+ else {
785
+ model_lst_nonlazy["ptr"] = Rcpp::unwindProtect(safe_XPtr<ExtIsoForest>, ext_model_ptr.get());
786
+ ext_model_ptr.release();
787
+ }
788
+
789
+ outp["model"] = model_lst_nonlazy;
790
+
791
+ if (build_imputer)
347
792
  {
348
- serialization_failed = true;
349
- imputer_ptr.reset();
350
- if (ndim == 1)
351
- model_ptr.reset();
352
- else
353
- ext_model_ptr.reset();
354
- outp["imputer_ptr"] = R_NilValue;
355
- outp["model_ptr"] = R_NilValue;
356
- } else
357
- outp["imputer_ptr"] = Rcpp::XPtr<Imputer>(imputer_ptr.release(), true);
793
+ imputer_lst_nonlazy["ser"] = serialize_cpp_obj(imputer_ptr.get());
794
+ if (!Rf_xlength(imputer_lst_nonlazy["ser"]))
795
+ {
796
+ throw Rcpp::exception("Error: insufficient memory\n");
797
+ }
798
+
799
+ imputer_lst_nonlazy["ptr"] = Rcpp::unwindProtect(safe_XPtr<Imputer>, imputer_ptr.get());
800
+ imputer_ptr.release();
801
+ }
802
+
803
+ outp["imputer"] = imputer_lst_nonlazy;
358
804
  }
359
805
 
360
- if (output_imputations && !serialization_failed)
806
+ if (output_imputations)
361
807
  {
362
- outp["imputed_num"] = Rcpp::NumericVector(Xcpp.begin(), Xcpp.end());
808
+ outp["imputed_num"] = Xcpp;
363
809
  outp["imputed_cat"] = X_cat;
364
810
  }
365
811
 
366
- outp["err"] = Rcpp::LogicalVector::create(0);
367
-
812
+ outp["err"] = Rcpp::unwindProtect(safe_FALSE, nullptr);
368
813
  return outp;
369
814
  }
370
815
 
371
- // [[Rcpp::export]]
372
- Rcpp::RawVector fit_tree(SEXP model_R_ptr,
373
- Rcpp::NumericVector X_num, Rcpp::IntegerVector X_cat, Rcpp::IntegerVector ncat,
374
- Rcpp::NumericVector Xc, Rcpp::IntegerVector Xc_ind, Rcpp::IntegerVector Xc_indptr,
375
- Rcpp::NumericVector sample_weights, Rcpp::NumericVector col_weights,
376
- size_t nrows, size_t ncols_numeric, size_t ncols_categ,
377
- size_t ndim, size_t ntry, Rcpp::CharacterVector coef_type, bool coef_by_prop,
378
- size_t max_depth, bool limit_depth, bool penalize_range,
379
- bool weigh_by_kurt,
380
- double prob_pick_by_gain_avg, double prob_split_by_gain_avg,
381
- double prob_pick_by_gain_pl, double prob_split_by_gain_pl, double min_gain,
382
- Rcpp::CharacterVector cat_split_type, Rcpp::CharacterVector new_cat_action,
383
- Rcpp::CharacterVector missing_action, bool build_imputer, size_t min_imp_obs, SEXP imp_R_ptr,
384
- Rcpp::CharacterVector depth_imp, Rcpp::CharacterVector weigh_imp_rows,
385
- bool all_perm, uint64_t random_seed)
816
+ // [[Rcpp::export(rng = false)]]
817
+ void fit_tree(SEXP model_R_ptr, Rcpp::RawVector serialized_obj, Rcpp::RawVector serialized_imputer,
818
+ SEXP indexer_R_ptr, Rcpp::RawVector serialized_indexer,
819
+ Rcpp::NumericVector X_num, Rcpp::IntegerVector X_cat, Rcpp::IntegerVector ncat,
820
+ Rcpp::NumericVector Xc, Rcpp::IntegerVector Xc_ind, Rcpp::IntegerVector Xc_indptr,
821
+ Rcpp::NumericVector sample_weights, Rcpp::NumericVector col_weights,
822
+ size_t nrows, size_t ncols_numeric, size_t ncols_categ,
823
+ size_t ndim, size_t ntry, Rcpp::CharacterVector coef_type, bool coef_by_prop,
824
+ size_t max_depth, size_t ncols_per_tree, bool limit_depth, bool penalize_range,
825
+ bool standardize_data, bool fast_bratio, bool weigh_by_kurt,
826
+ double prob_pick_by_gain_pl, double prob_pick_by_gain_avg,
827
+ double prob_pick_by_full_gain, double prob_pick_by_dens,
828
+ double prob_pick_col_by_range, double prob_pick_col_by_var,
829
+ double prob_pick_col_by_kurt, double min_gain,
830
+ Rcpp::CharacterVector cat_split_type, Rcpp::CharacterVector new_cat_action,
831
+ Rcpp::CharacterVector missing_action, bool build_imputer, size_t min_imp_obs, SEXP imp_R_ptr,
832
+ Rcpp::CharacterVector depth_imp, Rcpp::CharacterVector weigh_imp_rows,
833
+ bool all_perm,
834
+ Rcpp::NumericVector ref_X_num, Rcpp::IntegerVector ref_X_cat,
835
+ Rcpp::NumericVector ref_Xc, Rcpp::IntegerVector ref_Xc_ind, Rcpp::IntegerVector ref_Xc_indptr,
836
+ uint64_t random_seed, bool use_long_double,
837
+ Rcpp::List &model_cpp_obj_update, Rcpp::List &model_params_update,
838
+ bool is_altrepped)
386
839
  {
840
+ Rcpp::List out = Rcpp::List::create(
841
+ Rcpp::_["model_ser"] = R_NilValue,
842
+ Rcpp::_["imputer_ser"] = R_NilValue,
843
+ Rcpp::_["indexer_ser"] = R_NilValue
844
+ );
845
+
846
+ Rcpp::IntegerVector ntrees_plus1 = Rcpp::IntegerVector::create(Rf_asInteger(model_params_update["ntrees"]) + 1);
847
+
387
848
  double* numeric_data_ptr = NULL;
388
849
  int* categ_data_ptr = NULL;
389
850
  int* ncat_ptr = NULL;
390
851
  double* Xc_ptr = NULL;
391
- sparse_ix* Xc_ind_ptr = NULL;
392
- sparse_ix* Xc_indptr_ptr = NULL;
852
+ int* Xc_ind_ptr = NULL;
853
+ int* Xc_indptr_ptr = NULL;
393
854
  double* sample_weights_ptr = NULL;
394
855
  double* col_weights_ptr = NULL;
395
- std::vector<double> Xcpp;
856
+ Rcpp::NumericVector Xcpp;
396
857
 
397
858
  if (X_num.size())
398
859
  {
399
- numeric_data_ptr = &X_num[0];
400
- if (Rcpp::as<std::string>(missing_action) != std::string("fail"))
860
+ numeric_data_ptr = REAL(X_num);
861
+ if (Rcpp::as<std::string>(missing_action) != "fail")
401
862
  numeric_data_ptr = set_R_nan_as_C_nan(numeric_data_ptr, nrows * ncols_numeric, Xcpp, 1);
402
863
  }
403
864
 
404
865
  if (X_cat.size())
405
866
  {
406
- categ_data_ptr = &X_cat[0];
407
- ncat_ptr = &ncat[0];
867
+ categ_data_ptr = INTEGER(X_cat);
868
+ ncat_ptr = INTEGER(ncat);
408
869
  }
409
870
 
410
871
  if (Xc.size())
411
872
  {
412
- Xc_ptr = &Xc[0];
413
- Xc_ind_ptr = &Xc_ind[0];
414
- Xc_indptr_ptr = &Xc_indptr[0];
415
- if (Rcpp::as<std::string>(missing_action) != std::string("fail"))
873
+ Xc_ptr = REAL(Xc);
874
+ Xc_ind_ptr = INTEGER(Xc_ind);
875
+ Xc_indptr_ptr = INTEGER(Xc_indptr);
876
+ if (Rcpp::as<std::string>(missing_action) != "fail")
416
877
  Xc_ptr = set_R_nan_as_C_nan(Xc_ptr, Xc.size(), Xcpp, 1);
417
878
  }
418
879
 
880
+ double* ref_numeric_data_ptr = NULL;
881
+ int* ref_categ_data_ptr = NULL;
882
+ double* ref_Xc_ptr = NULL;
883
+ int* ref_Xc_ind_ptr = NULL;
884
+ int* ref_Xc_indptr_ptr = NULL;
885
+ Rcpp::NumericVector ref_Xcpp;
886
+ if (ref_X_num.size())
887
+ {
888
+ ref_numeric_data_ptr = REAL(ref_X_num);
889
+ if (Rcpp::as<std::string>(missing_action) != "fail")
890
+ ref_numeric_data_ptr = set_R_nan_as_C_nan(ref_numeric_data_ptr, ref_X_num.size(), ref_Xcpp, 1);
891
+ }
892
+
893
+ if (ref_X_cat.size())
894
+ {
895
+ ref_categ_data_ptr = INTEGER(ref_X_cat);
896
+ }
897
+
898
+ if (ref_Xc.size())
899
+ {
900
+ ref_Xc_ptr = REAL(ref_Xc);
901
+ ref_Xc_ind_ptr = INTEGER(ref_Xc_ind);
902
+ ref_Xc_indptr_ptr = INTEGER(ref_Xc_indptr);
903
+ if (Rcpp::as<std::string>(missing_action) != "fail")
904
+ ref_Xc_ptr = set_R_nan_as_C_nan(ref_Xc_ptr, ref_Xc.size(), ref_Xcpp, 1);
905
+ }
906
+
419
907
  if (sample_weights.size())
420
908
  {
421
- sample_weights_ptr = &sample_weights[0];
909
+ sample_weights_ptr = REAL(sample_weights);
422
910
  }
423
911
 
424
912
  if (col_weights.size())
425
913
  {
426
- col_weights_ptr = &col_weights[0];
914
+ col_weights_ptr = REAL(col_weights);
427
915
  }
428
916
 
429
917
  CoefType coef_type_C = Normal;
@@ -433,62 +921,63 @@ Rcpp::RawVector fit_tree(SEXP model_R_ptr,
433
921
  UseDepthImp depth_imp_C = Higher;
434
922
  WeighImpRows weigh_imp_rows_C = Inverse;
435
923
 
436
- if (Rcpp::as<std::string>(coef_type) == std::string("uniform"))
924
+ if (Rcpp::as<std::string>(coef_type) == "uniform")
437
925
  {
438
926
  coef_type_C = Uniform;
439
927
  }
440
- if (Rcpp::as<std::string>(cat_split_type) == std::string("single_categ"))
928
+ if (Rcpp::as<std::string>(cat_split_type) == "single_categ")
441
929
  {
442
930
  cat_split_type_C = SingleCateg;
443
931
  }
444
- if (Rcpp::as<std::string>(new_cat_action) == std::string("smallest"))
932
+ if (Rcpp::as<std::string>(new_cat_action) == "smallest")
445
933
  {
446
934
  new_cat_action_C = Smallest;
447
935
  }
448
- else if (Rcpp::as<std::string>(new_cat_action) == std::string("random"))
936
+ else if (Rcpp::as<std::string>(new_cat_action) == "random")
449
937
  {
450
938
  new_cat_action_C = Random;
451
939
  }
452
- if (Rcpp::as<std::string>(missing_action) == std::string("impute"))
940
+ if (Rcpp::as<std::string>(missing_action) == "impute")
453
941
  {
454
942
  missing_action_C = Impute;
455
943
  }
456
- else if (Rcpp::as<std::string>(missing_action) == std::string("fail"))
944
+ else if (Rcpp::as<std::string>(missing_action) == "fail")
457
945
  {
458
946
  missing_action_C = Fail;
459
947
  }
460
- if (Rcpp::as<std::string>(depth_imp) == std::string("lower"))
948
+ if (Rcpp::as<std::string>(depth_imp) == "lower")
461
949
  {
462
950
  depth_imp_C = Lower;
463
951
  }
464
- else if (Rcpp::as<std::string>(depth_imp) == std::string("same"))
952
+ else if (Rcpp::as<std::string>(depth_imp) == "same")
465
953
  {
466
954
  depth_imp_C = Same;
467
955
  }
468
- if (Rcpp::as<std::string>(weigh_imp_rows) == std::string("prop"))
956
+ if (Rcpp::as<std::string>(weigh_imp_rows) == "prop")
469
957
  {
470
958
  weigh_imp_rows_C = Prop;
471
959
  }
472
- else if (Rcpp::as<std::string>(weigh_imp_rows) == std::string("flat"))
960
+ else if (Rcpp::as<std::string>(weigh_imp_rows) == "flat")
473
961
  {
474
962
  weigh_imp_rows_C = Flat;
475
963
  }
964
+
476
965
 
477
966
  IsoForest* model_ptr = NULL;
478
967
  ExtIsoForest* ext_model_ptr = NULL;
479
- Imputer* imputer_ptr = NULL;
968
+ Imputer* imputer_ptr = NULL;
969
+ TreesIndexer* indexer_ptr = NULL;
480
970
  if (ndim == 1)
481
971
  model_ptr = static_cast<IsoForest*>(R_ExternalPtrAddr(model_R_ptr));
482
972
  else
483
973
  ext_model_ptr = static_cast<ExtIsoForest*>(R_ExternalPtrAddr(model_R_ptr));
484
974
 
485
- std::vector<ImputeNode> *imp_ptr = NULL;
486
975
  if (build_imputer)
487
- {
488
976
  imputer_ptr = static_cast<Imputer*>(R_ExternalPtrAddr(imp_R_ptr));
489
- imputer_ptr->imputer_tree.emplace_back();
490
- imp_ptr = &imputer_ptr->imputer_tree.back();
491
- }
977
+
978
+ indexer_ptr = get_indexer_ptr_from_R_obj(indexer_R_ptr);
979
+
980
+ size_t old_ntrees = (ndim == 1)? (model_ptr->trees.size()) : (ext_model_ptr->hplanes.size());
492
981
 
493
982
  add_tree(model_ptr, ext_model_ptr,
494
983
  numeric_data_ptr, ncols_numeric,
@@ -496,24 +985,172 @@ Rcpp::RawVector fit_tree(SEXP model_R_ptr,
496
985
  Xc_ptr, Xc_ind_ptr, Xc_indptr_ptr,
497
986
  ndim, ntry, coef_type_C, coef_by_prop,
498
987
  sample_weights_ptr,
499
- nrows, max_depth,
500
- limit_depth, penalize_range,
988
+ nrows, max_depth, ncols_per_tree,
989
+ limit_depth, penalize_range, standardize_data, fast_bratio,
501
990
  col_weights_ptr, weigh_by_kurt,
502
- prob_pick_by_gain_avg, prob_split_by_gain_avg,
503
- prob_pick_by_gain_pl, prob_split_by_gain_pl,
991
+ prob_pick_by_gain_pl, prob_pick_by_gain_avg,
992
+ prob_pick_by_full_gain, prob_pick_by_dens,
993
+ prob_pick_col_by_range, prob_pick_col_by_var,
994
+ prob_pick_col_by_kurt,
504
995
  min_gain, missing_action_C,
505
996
  cat_split_type_C, new_cat_action_C,
506
997
  depth_imp_C, weigh_imp_rows_C, all_perm,
507
- imp_ptr, min_imp_obs, (uint64_t)random_seed);
998
+ imputer_ptr, min_imp_obs,
999
+ indexer_ptr,
1000
+ ref_numeric_data_ptr, ref_categ_data_ptr,
1001
+ true, (size_t)0, (size_t)0,
1002
+ ref_Xc_ptr, ref_Xc_ind_ptr, ref_Xc_indptr_ptr,
1003
+ (uint64_t)random_seed, use_long_double);
1004
+
1005
+ Rcpp::RawVector new_serialized, new_imp_serialized, new_ind_serialized;
1006
+ size_t new_size;
1007
+
1008
+ if (is_altrepped) goto dont_serialize;
1009
+
1010
+ try
1011
+ {
1012
+ if (ndim == 1)
1013
+ {
1014
+ if (serialized_obj.size() &&
1015
+ check_can_undergo_incremental_serialization(*model_ptr, (char*)RAW(serialized_obj)))
1016
+ {
1017
+ try {
1018
+ new_size = serialized_obj.size()
1019
+ + determine_serialized_size_additional_trees(*model_ptr, old_ntrees);
1020
+ new_serialized = resize_vec(serialized_obj, new_size);
1021
+ char *temp = (char*)RAW(new_serialized);
1022
+ incremental_serialize_isotree(*model_ptr, temp);
1023
+ out["model_ser"] = new_serialized;
1024
+ }
1025
+
1026
+ catch (std::runtime_error &e) {
1027
+ goto serialize_anew_singlevar;
1028
+ }
1029
+ }
1030
+
1031
+ else {
1032
+ serialize_anew_singlevar:
1033
+ out["model_ser"] = serialize_cpp_obj(model_ptr);
1034
+ }
1035
+ }
508
1036
 
509
- if (ndim == 1)
510
- return serialize_cpp_obj(model_ptr);
511
- else
512
- return serialize_cpp_obj(ext_model_ptr);
1037
+ else
1038
+ {
1039
+ if (serialized_obj.size() &&
1040
+ check_can_undergo_incremental_serialization(*ext_model_ptr, (char*)RAW(serialized_obj)))
1041
+ {
1042
+ try {
1043
+ new_size = serialized_obj.size()
1044
+ + determine_serialized_size_additional_trees(*ext_model_ptr, old_ntrees);
1045
+ new_serialized = resize_vec(serialized_obj, new_size);
1046
+ char *temp = (char*)RAW(new_serialized);
1047
+ incremental_serialize_isotree(*ext_model_ptr, temp);
1048
+ out["model_ser"] = new_serialized;
1049
+ }
1050
+
1051
+ catch (std::runtime_error &e) {
1052
+ goto serialize_anew_ext;
1053
+ }
1054
+ }
1055
+
1056
+ else {
1057
+ serialize_anew_ext:
1058
+ out["model_ser"] = serialize_cpp_obj(ext_model_ptr);
1059
+ }
1060
+ }
1061
+
1062
+ if (imputer_ptr != NULL)
1063
+ {
1064
+ if (serialized_imputer.size() &&
1065
+ check_can_undergo_incremental_serialization(*imputer_ptr, (char*)RAW(serialized_imputer)))
1066
+ {
1067
+ try {
1068
+ new_size = serialized_imputer.size()
1069
+ + determine_serialized_size_additional_trees(*imputer_ptr, old_ntrees);
1070
+ new_imp_serialized = resize_vec(serialized_imputer, new_size);
1071
+ char *temp = (char*)RAW(new_imp_serialized);
1072
+ incremental_serialize_isotree(*imputer_ptr, temp);
1073
+ out["imputer_ser"] = new_imp_serialized;
1074
+ }
1075
+
1076
+ catch (std::runtime_error &e) {
1077
+ goto serialize_anew_imp;
1078
+ }
1079
+ }
1080
+
1081
+ else {
1082
+ serialize_anew_imp:
1083
+ out["imputer_ser"] = serialize_cpp_obj(imputer_ptr);
1084
+ }
1085
+ }
1086
+
1087
+ if (indexer_ptr != NULL)
1088
+ {
1089
+ if (serialized_indexer.size() &&
1090
+ check_can_undergo_incremental_serialization(*indexer_ptr, (char*)RAW(serialized_indexer)))
1091
+ {
1092
+ try {
1093
+ new_size = serialized_indexer.size()
1094
+ + determine_serialized_size_additional_trees(*indexer_ptr, old_ntrees);
1095
+ new_ind_serialized = resize_vec(serialized_indexer, new_size);
1096
+ char *temp = (char*)RAW(new_ind_serialized);
1097
+ incremental_serialize_isotree(*indexer_ptr, temp);
1098
+ out["indexer_ser"] = new_ind_serialized;
1099
+ }
1100
+
1101
+ catch (std::runtime_error &e) {
1102
+ goto serialize_anew_ind;
1103
+ }
1104
+ }
1105
+
1106
+ else {
1107
+ serialize_anew_ind:
1108
+ out["indexer_ser"] = serialize_cpp_obj(indexer_ptr);
1109
+ }
1110
+ }
1111
+ }
1112
+
1113
+ catch (...)
1114
+ {
1115
+ if (ndim == 1)
1116
+ model_ptr->trees.resize(old_ntrees);
1117
+ else
1118
+ ext_model_ptr->hplanes.resize(old_ntrees);
1119
+ if (build_imputer)
1120
+ imputer_ptr->imputer_tree.resize(old_ntrees);
1121
+ if (indexer_ptr != NULL)
1122
+ indexer_ptr->indices.resize(old_ntrees);
1123
+ throw;
1124
+ }
1125
+
1126
+ {
1127
+ Rcpp::List model_lst = model_cpp_obj_update["model"];
1128
+ model_lst["ser"] = out["model_ser"];
1129
+ model_cpp_obj_update["model"] = model_lst;
1130
+
1131
+ if (build_imputer)
1132
+ {
1133
+ Rcpp::List imputer_lst = model_cpp_obj_update["imputer"];
1134
+ imputer_lst["ser"] = out["imputer_ser"];
1135
+ model_cpp_obj_update["imputer"] = imputer_lst;
1136
+ }
1137
+
1138
+ if (indexer_ptr)
1139
+ {
1140
+ Rcpp::List indexer_lst = model_cpp_obj_update["indexer"];
1141
+ indexer_lst["ser"] = out["indexer_ser"];
1142
+ model_cpp_obj_update["indexer"] = indexer_lst;
1143
+ }
1144
+ }
1145
+
1146
+ dont_serialize:
1147
+ model_params_update["ntrees"] = ntrees_plus1;
513
1148
  }
514
1149
 
515
- // [[Rcpp::export]]
516
- void predict_iso(SEXP model_R_ptr, Rcpp::NumericVector outp, Rcpp::IntegerVector tree_num, bool is_extended,
1150
+ // [[Rcpp::export(rng = false)]]
1151
+ void predict_iso(SEXP model_R_ptr, bool is_extended,
1152
+ SEXP indexer_R_ptr,
1153
+ Rcpp::NumericVector outp, Rcpp::IntegerMatrix tree_num, Rcpp::NumericMatrix tree_depths,
517
1154
  Rcpp::NumericVector X_num, Rcpp::IntegerVector X_cat,
518
1155
  Rcpp::NumericVector Xc, Rcpp::IntegerVector Xc_ind, Rcpp::IntegerVector Xc_indptr,
519
1156
  Rcpp::NumericVector Xr, Rcpp::IntegerVector Xr_ind, Rcpp::IntegerVector Xr_indptr,
@@ -522,55 +1159,48 @@ void predict_iso(SEXP model_R_ptr, Rcpp::NumericVector outp, Rcpp::IntegerVector
522
1159
  double* numeric_data_ptr = NULL;
523
1160
  int* categ_data_ptr = NULL;
524
1161
  double* Xc_ptr = NULL;
525
- sparse_ix* Xc_ind_ptr = NULL;
526
- sparse_ix* Xc_indptr_ptr = NULL;
1162
+ int* Xc_ind_ptr = NULL;
1163
+ int* Xc_indptr_ptr = NULL;
527
1164
  double* Xr_ptr = NULL;
528
- sparse_ix* Xr_ind_ptr = NULL;
529
- sparse_ix* Xr_indptr_ptr = NULL;
530
- sparse_ix* tree_num_ptr = NULL;
531
- std::vector<double> Xcpp;
1165
+ int* Xr_ind_ptr = NULL;
1166
+ int* Xr_indptr_ptr = NULL;
1167
+ Rcpp::NumericVector Xcpp;
532
1168
 
533
1169
  if (X_num.size())
534
1170
  {
535
- numeric_data_ptr = &X_num[0];
1171
+ numeric_data_ptr = REAL(X_num);
536
1172
  }
537
1173
 
538
1174
  if (X_cat.size())
539
1175
  {
540
- categ_data_ptr = &X_cat[0];
1176
+ categ_data_ptr = INTEGER(X_cat);
541
1177
  }
542
1178
 
543
1179
  if (Xc_indptr.size())
544
1180
  {
545
- if (Xc.size())
546
- Xc_ptr = &Xc[0];
547
- if (Xc_ind.size())
548
- Xc_ind_ptr = &Xc_ind[0];
549
- Xc_indptr_ptr = &Xc_indptr[0];
1181
+ Xc_ptr = REAL(Xc);
1182
+ Xc_ind_ptr = INTEGER(Xc_ind);
1183
+ Xc_indptr_ptr = INTEGER(Xc_indptr);
550
1184
  }
551
1185
 
552
1186
  if (Xr_indptr.size())
553
1187
  {
554
- if (Xr.size())
555
- Xr_ptr = &Xr[0];
556
- if (Xr_ind.size())
557
- Xr_ind_ptr = &Xr_ind[0];
558
- Xr_indptr_ptr = &Xr_indptr[0];
559
- }
560
-
561
- if (tree_num.size())
562
- {
563
- tree_num_ptr = &tree_num[0];
1188
+ Xr_ptr = REAL(Xr);
1189
+ Xr_ind_ptr = INTEGER(Xr_ind);
1190
+ Xr_indptr_ptr = INTEGER(Xr_indptr);
564
1191
  }
565
1192
 
566
- double* depths_ptr = &outp[0];
1193
+ double *depths_ptr = REAL(outp);
1194
+ double *tree_depths_ptr = tree_depths.size()? REAL(tree_depths) : NULL;
1195
+ int *tree_num_ptr = tree_num.size()? INTEGER(tree_num) : NULL;
567
1196
 
568
1197
  IsoForest* model_ptr = NULL;
569
1198
  ExtIsoForest* ext_model_ptr = NULL;
570
1199
  if (is_extended)
571
- ext_model_ptr = static_cast<ExtIsoForest*>(R_ExternalPtrAddr(model_R_ptr));
1200
+ ext_model_ptr = get_pointer_from_xptr<ExtIsoForest>(model_R_ptr);
572
1201
  else
573
- model_ptr = static_cast<IsoForest*>(R_ExternalPtrAddr(model_R_ptr));
1202
+ model_ptr = get_pointer_from_xptr<IsoForest>(model_R_ptr);
1203
+ TreesIndexer* indexer = get_indexer_ptr_from_R_obj(indexer_R_ptr);
574
1204
 
575
1205
  MissingAction missing_action = is_extended?
576
1206
  ext_model_ptr->missing_action
@@ -583,58 +1213,71 @@ void predict_iso(SEXP model_R_ptr, Rcpp::NumericVector outp, Rcpp::IntegerVector
583
1213
  if (Xr.size()) Xr_ptr = set_R_nan_as_C_nan(Xr_ptr, Xr.size(), Xcpp, nthreads);
584
1214
  }
585
1215
 
586
- predict_iforest(numeric_data_ptr, categ_data_ptr,
587
- Xc_ptr, Xc_ind_ptr, Xc_indptr_ptr,
588
- Xr_ptr, Xr_ind_ptr, Xr_indptr_ptr,
589
- nrows, nthreads, standardize,
590
- model_ptr, ext_model_ptr,
591
- depths_ptr, tree_num_ptr);
1216
+ predict_iforest<double, int>(numeric_data_ptr, categ_data_ptr,
1217
+ true, (size_t)0, (size_t)0,
1218
+ Xc_ptr, Xc_ind_ptr, Xc_indptr_ptr,
1219
+ Xr_ptr, Xr_ind_ptr, Xr_indptr_ptr,
1220
+ nrows, nthreads, standardize,
1221
+ model_ptr, ext_model_ptr,
1222
+ depths_ptr, tree_num_ptr,
1223
+ tree_depths_ptr,
1224
+ indexer);
592
1225
  }
593
1226
 
594
- // [[Rcpp::export]]
595
- void dist_iso(SEXP model_R_ptr, Rcpp::NumericVector tmat, Rcpp::NumericVector dmat,
596
- Rcpp::NumericVector rmat, bool is_extended,
1227
+ // [[Rcpp::export(rng = false)]]
1228
+ void dist_iso(SEXP model_R_ptr, SEXP indexer_R_ptr,
1229
+ Rcpp::NumericVector tmat, Rcpp::NumericMatrix dmat,
1230
+ Rcpp::NumericMatrix rmat, bool is_extended,
597
1231
  Rcpp::NumericVector X_num, Rcpp::IntegerVector X_cat,
598
1232
  Rcpp::NumericVector Xc, Rcpp::IntegerVector Xc_ind, Rcpp::IntegerVector Xc_indptr,
599
- size_t nrows, int nthreads, bool assume_full_distr,
600
- bool standardize_dist, bool sq_dist, size_t n_from)
1233
+ size_t nrows, bool use_long_double, int nthreads, bool assume_full_distr,
1234
+ bool standardize_dist, bool sq_dist, size_t n_from,
1235
+ bool use_reference_points, bool as_kernel)
601
1236
  {
602
1237
  double* numeric_data_ptr = NULL;
603
1238
  int* categ_data_ptr = NULL;
604
1239
  double* Xc_ptr = NULL;
605
- sparse_ix* Xc_ind_ptr = NULL;
606
- sparse_ix* Xc_indptr_ptr = NULL;
607
- std::vector<double> Xcpp;
1240
+ int* Xc_ind_ptr = NULL;
1241
+ int* Xc_indptr_ptr = NULL;
1242
+ Rcpp::NumericVector Xcpp;
608
1243
 
609
1244
  if (X_num.size())
610
1245
  {
611
- numeric_data_ptr = &X_num[0];
1246
+ numeric_data_ptr = REAL(X_num);
612
1247
  }
613
1248
 
614
1249
  if (X_cat.size())
615
1250
  {
616
- categ_data_ptr = &X_cat[0];
1251
+ categ_data_ptr = INTEGER(X_cat);
617
1252
  }
618
1253
 
619
1254
  if (Xc_indptr.size())
620
1255
  {
621
- if (Xc.size())
622
- Xc_ptr = &Xc[0];
623
- if (Xc_ind.size())
624
- Xc_ind_ptr = &Xc_ind[0];
625
- Xc_indptr_ptr = &Xc_indptr[0];
1256
+ Xc_ptr = REAL(Xc);
1257
+ Xc_ind_ptr = INTEGER(Xc_ind);
1258
+ Xc_indptr_ptr = INTEGER(Xc_indptr);
626
1259
  }
627
1260
 
628
- double* tmat_ptr = n_from? (double*)NULL : &tmat[0];
629
- double* dmat_ptr = (sq_dist & !n_from)? &dmat[0] : NULL;
630
- double* rmat_ptr = n_from? &rmat[0] : NULL;
1261
+ double* tmat_ptr = n_from? (double*)NULL : REAL(tmat);
1262
+ double* dmat_ptr = (sq_dist & !n_from)? REAL(dmat) : NULL;
1263
+ double* rmat_ptr = n_from? REAL(rmat) : NULL;
631
1264
 
632
1265
  IsoForest* model_ptr = NULL;
633
1266
  ExtIsoForest* ext_model_ptr = NULL;
1267
+ TreesIndexer* indexer = get_indexer_ptr_from_R_obj(indexer_R_ptr);
634
1268
  if (is_extended)
635
- ext_model_ptr = static_cast<ExtIsoForest*>(R_ExternalPtrAddr(model_R_ptr));
1269
+ ext_model_ptr = get_pointer_from_xptr<ExtIsoForest>(model_R_ptr);
636
1270
  else
637
- model_ptr = static_cast<IsoForest*>(R_ExternalPtrAddr(model_R_ptr));
1271
+ model_ptr = get_pointer_from_xptr<IsoForest>(model_R_ptr);
1272
+
1273
+ if (use_reference_points && indexer && !indexer->indices.front().reference_points.empty()) {
1274
+ tmat_ptr = NULL;
1275
+ dmat_ptr = NULL;
1276
+ rmat_ptr = REAL(rmat);
1277
+ }
1278
+ else {
1279
+ use_reference_points = false;
1280
+ }
638
1281
 
639
1282
 
640
1283
  MissingAction missing_action = is_extended?
@@ -650,43 +1293,58 @@ void dist_iso(SEXP model_R_ptr, Rcpp::NumericVector tmat, Rcpp::NumericVector dm
650
1293
 
651
1294
  calc_similarity(numeric_data_ptr, categ_data_ptr,
652
1295
  Xc_ptr, Xc_ind_ptr, Xc_indptr_ptr,
653
- nrows, nthreads, assume_full_distr, standardize_dist,
1296
+ nrows, use_long_double, nthreads,
1297
+ assume_full_distr, standardize_dist, as_kernel,
654
1298
  model_ptr, ext_model_ptr,
655
- tmat_ptr, rmat_ptr, n_from);
1299
+ tmat_ptr, rmat_ptr, n_from, use_reference_points,
1300
+ indexer, true, (size_t)0, (size_t)0);
656
1301
 
657
- if (sq_dist & !n_from)
658
- tmat_to_dense(tmat_ptr, dmat_ptr, nrows, !standardize_dist);
1302
+ if (tmat.size() && dmat.ncol() > 0)
1303
+ {
1304
+ double diag_filler;
1305
+ if (as_kernel) {
1306
+ if (standardize_dist)
1307
+ diag_filler = 1.;
1308
+ else
1309
+ diag_filler = (model_ptr != NULL)? model_ptr->trees.size() : ext_model_ptr->hplanes.size();
1310
+ }
1311
+ else {
1312
+ if (standardize_dist)
1313
+ diag_filler = 0;
1314
+ else
1315
+ diag_filler = std::numeric_limits<double>::infinity();
1316
+ }
1317
+ tmat_to_dense(tmat_ptr, dmat_ptr, nrows, diag_filler);
1318
+ }
659
1319
  }
660
1320
 
661
- // [[Rcpp::export]]
1321
+ // [[Rcpp::export(rng = false)]]
662
1322
  Rcpp::List impute_iso(SEXP model_R_ptr, SEXP imputer_R_ptr, bool is_extended,
663
1323
  Rcpp::NumericVector X_num, Rcpp::IntegerVector X_cat,
664
1324
  Rcpp::NumericVector Xr, Rcpp::IntegerVector Xr_ind, Rcpp::IntegerVector Xr_indptr,
665
- size_t nrows, int nthreads)
1325
+ size_t nrows, bool use_long_double, int nthreads)
666
1326
  {
667
1327
  double* numeric_data_ptr = NULL;
668
1328
  int* categ_data_ptr = NULL;
669
1329
  double* Xr_ptr = NULL;
670
- sparse_ix* Xr_ind_ptr = NULL;
671
- sparse_ix* Xr_indptr_ptr = NULL;
1330
+ int* Xr_ind_ptr = NULL;
1331
+ int* Xr_indptr_ptr = NULL;
672
1332
 
673
1333
  if (X_num.size())
674
1334
  {
675
- numeric_data_ptr = &X_num[0];
1335
+ numeric_data_ptr = REAL(X_num);
676
1336
  }
677
1337
 
678
1338
  if (X_cat.size())
679
1339
  {
680
- categ_data_ptr = &X_cat[0];
1340
+ categ_data_ptr = INTEGER(X_cat);
681
1341
  }
682
1342
 
683
1343
  if (Xr_indptr.size())
684
1344
  {
685
- if (Xr.size())
686
- Xr_ptr = &Xr[0];
687
- if (Xr_ind.size())
688
- Xr_ind_ptr = &Xr_ind[0];
689
- Xr_indptr_ptr = &Xr_indptr[0];
1345
+ Xr_ptr = REAL(Xr);
1346
+ Xr_ind_ptr = INTEGER(Xr_ind);
1347
+ Xr_indptr_ptr = INTEGER(Xr_indptr);
690
1348
  }
691
1349
 
692
1350
  if (X_num.size()) numeric_data_ptr = set_R_nan_as_C_nan(numeric_data_ptr, X_num.size(), nthreads);
@@ -695,16 +1353,18 @@ Rcpp::List impute_iso(SEXP model_R_ptr, SEXP imputer_R_ptr, bool is_extended,
695
1353
  IsoForest* model_ptr = NULL;
696
1354
  ExtIsoForest* ext_model_ptr = NULL;
697
1355
  if (is_extended)
698
- ext_model_ptr = static_cast<ExtIsoForest*>(R_ExternalPtrAddr(model_R_ptr));
1356
+ ext_model_ptr = get_pointer_from_xptr<ExtIsoForest>(model_R_ptr);
699
1357
  else
700
- model_ptr = static_cast<IsoForest*>(R_ExternalPtrAddr(model_R_ptr));
1358
+ model_ptr = get_pointer_from_xptr<IsoForest>(model_R_ptr);
1359
+
1360
+ Imputer* imputer_ptr = get_pointer_from_xptr<Imputer>(imputer_R_ptr);
701
1361
 
702
- Imputer* imputer_ptr = static_cast<Imputer*>(R_ExternalPtrAddr(imputer_R_ptr));
1362
+ if (!imputer_ptr) throw Rcpp::exception("Error: requested missing value imputation, but model was built without imputer.\n");
703
1363
 
704
1364
 
705
- impute_missing_values(numeric_data_ptr, categ_data_ptr,
1365
+ impute_missing_values(numeric_data_ptr, categ_data_ptr, true,
706
1366
  Xr_ptr, Xr_ind_ptr, Xr_indptr_ptr,
707
- nrows, nthreads,
1367
+ nrows, use_long_double, nthreads,
708
1368
  model_ptr, ext_model_ptr,
709
1369
  *imputer_ptr);
710
1370
 
@@ -714,7 +1374,273 @@ Rcpp::List impute_iso(SEXP model_R_ptr, SEXP imputer_R_ptr, bool is_extended,
714
1374
  );
715
1375
  }
716
1376
 
717
- // [[Rcpp::export]]
1377
+ // [[Rcpp::export(rng = false)]]
1378
+ void drop_imputer(bool is_altrepped, bool free_cpp,
1379
+ SEXP lst_imputer, Rcpp::List lst_cpp_objects, Rcpp::List lst_params)
1380
+ {
1381
+ SEXP FalseObj = PROTECT(Rf_ScalarLogical(0));
1382
+ SEXP blank_ptr = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
1383
+ SEXP altrepped_null = PROTECT(get_altrepped_null_pointer());
1384
+
1385
+ if (is_altrepped) {
1386
+
1387
+ if (free_cpp) {
1388
+ SEXP imp_R_ptr = R_altrep_data1(lst_imputer);
1389
+ Imputer* imputer_ptr = (Imputer*)R_ExternalPtrAddr(imp_R_ptr);
1390
+ delete imputer_ptr;
1391
+ R_SetExternalPtrAddr(imp_R_ptr, nullptr);
1392
+ R_ClearExternalPtr(imp_R_ptr);
1393
+ }
1394
+
1395
+ lst_cpp_objects["imputer"] = altrepped_null;
1396
+
1397
+ }
1398
+
1399
+ else {
1400
+
1401
+ if (free_cpp) {
1402
+ SEXP imp_R_ptr = VECTOR_ELT(lst_imputer, 0);
1403
+ Imputer* imputer_ptr = get_pointer_from_xptr<Imputer>(imp_R_ptr);
1404
+ delete imputer_ptr;
1405
+ R_SetExternalPtrAddr(imp_R_ptr, nullptr);
1406
+ R_ClearExternalPtr(imp_R_ptr);
1407
+ SET_VECTOR_ELT(lst_imputer, 0, imp_R_ptr);
1408
+ }
1409
+
1410
+ SET_VECTOR_ELT(lst_imputer, 0, blank_ptr);
1411
+ SET_VECTOR_ELT(lst_imputer, 1, R_NilValue);
1412
+ }
1413
+
1414
+ lst_params["build_imputer"] = FalseObj;
1415
+ UNPROTECT(3);
1416
+ }
1417
+
1418
+ // [[Rcpp::export(rng = false)]]
1419
+ void drop_indexer(bool is_altrepped, bool free_cpp,
1420
+ SEXP lst_indexer, Rcpp::List lst_cpp_objects, Rcpp::List lst_metadata)
1421
+ {
1422
+ SEXP empty_str = PROTECT(Rf_allocVector(STRSXP, 0));
1423
+ SEXP blank_ptr = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
1424
+ SEXP altrepped_null = PROTECT(get_altrepped_null_pointer());
1425
+
1426
+ if (is_altrepped) {
1427
+
1428
+ if (free_cpp) {
1429
+ SEXP ind_R_ptr = R_altrep_data1(lst_indexer);
1430
+ TreesIndexer* indexer_ptr = (TreesIndexer*)R_ExternalPtrAddr(ind_R_ptr);
1431
+ delete indexer_ptr;
1432
+ R_SetExternalPtrAddr(ind_R_ptr, nullptr);
1433
+ R_ClearExternalPtr(ind_R_ptr);
1434
+ }
1435
+
1436
+ lst_cpp_objects["indexer"] = altrepped_null;
1437
+ }
1438
+
1439
+ else {
1440
+
1441
+ if (free_cpp) {
1442
+ SEXP ind_R_ptr = VECTOR_ELT(lst_indexer, 0);
1443
+ TreesIndexer* indexer_ptr = get_pointer_from_xptr<TreesIndexer>(ind_R_ptr);
1444
+ delete indexer_ptr;
1445
+ R_SetExternalPtrAddr(ind_R_ptr, nullptr);
1446
+ R_ClearExternalPtr(ind_R_ptr);
1447
+ SET_VECTOR_ELT(lst_indexer, 0, ind_R_ptr);
1448
+ }
1449
+
1450
+ SET_VECTOR_ELT(lst_indexer, 0, blank_ptr);
1451
+ SET_VECTOR_ELT(lst_indexer, 1, R_NilValue);
1452
+ }
1453
+
1454
+ lst_metadata["reference_names"] = empty_str;
1455
+ UNPROTECT(3);
1456
+ }
1457
+
1458
+ // [[Rcpp::export(rng = false)]]
1459
+ void drop_reference_points(bool is_altrepped, SEXP lst_indexer, Rcpp::List lst_cpp_objects, Rcpp::List lst_metadata)
1460
+ {
1461
+ SEXP empty_str = PROTECT(Rf_allocVector(STRSXP, 0));
1462
+
1463
+ if (is_altrepped)
1464
+ {
1465
+ SEXP ind_R_ptr = R_altrep_data1(lst_indexer);
1466
+ TreesIndexer* indexer_ptr = (TreesIndexer*)R_ExternalPtrAddr(ind_R_ptr);
1467
+ if (!indexer_ptr) return;
1468
+
1469
+ for (auto &tree : indexer_ptr->indices)
1470
+ {
1471
+ tree.reference_points.clear();
1472
+ tree.reference_indptr.clear();
1473
+ tree.reference_mapping.clear();
1474
+ }
1475
+ }
1476
+
1477
+ else
1478
+ {
1479
+ SEXP ind_R_ptr = VECTOR_ELT(lst_indexer, 0);
1480
+ TreesIndexer* indexer_ptr = get_pointer_from_xptr<TreesIndexer>(ind_R_ptr);
1481
+ if (!indexer_ptr) return;
1482
+
1483
+ std::unique_ptr<TreesIndexer> new_indexer(new TreesIndexer(*indexer_ptr));
1484
+ for (auto &tree : new_indexer->indices)
1485
+ {
1486
+ tree.reference_points.clear();
1487
+ tree.reference_indptr.clear();
1488
+ tree.reference_mapping.clear();
1489
+ }
1490
+
1491
+ SET_VECTOR_ELT(lst_indexer, 1, serialize_cpp_obj(new_indexer.get()));
1492
+ *indexer_ptr = std::move(*new_indexer);
1493
+ new_indexer.release();
1494
+ }
1495
+
1496
+ lst_metadata["reference_names"] = empty_str;
1497
+ UNPROTECT(1);
1498
+ }
1499
+
1500
+ // [[Rcpp::export(rng = false)]]
1501
+ Rcpp::List subset_trees
1502
+ (
1503
+ SEXP model_R_ptr, SEXP imputer_R_ptr, SEXP indexer_R_ptr,
1504
+ bool is_extended, bool is_altrepped,
1505
+ Rcpp::IntegerVector trees_take
1506
+ )
1507
+ {
1508
+ Rcpp::List out = Rcpp::List::create(
1509
+ Rcpp::_["model"] = R_NilValue,
1510
+ Rcpp::_["imputer"] = R_NilValue,
1511
+ Rcpp::_["indexer"] = R_NilValue
1512
+ );
1513
+ Rcpp::List lst_model = Rcpp::List::create(
1514
+ Rcpp::_["ptr"] = R_NilValue,
1515
+ Rcpp::_["ser"] = R_NilValue
1516
+ );
1517
+ Rcpp::List lst_imputer = Rcpp::List::create(
1518
+ Rcpp::_["ptr"] = Rcpp::XPtr<void*>(nullptr, false),
1519
+ Rcpp::_["ser"] = R_NilValue
1520
+ );
1521
+ Rcpp::List lst_indexer = Rcpp::List::create(
1522
+ Rcpp::_["ptr"] = Rcpp::XPtr<void*>(nullptr, false),
1523
+ Rcpp::_["ser"] = R_NilValue
1524
+ );
1525
+
1526
+
1527
+ IsoForest* model_ptr = NULL;
1528
+ ExtIsoForest* ext_model_ptr = NULL;
1529
+ Imputer* imputer_ptr = NULL;
1530
+ TreesIndexer* indexer_ptr = NULL;
1531
+ std::unique_ptr<IsoForest> new_model_ptr(nullptr);
1532
+ std::unique_ptr<ExtIsoForest> new_ext_model_ptr(nullptr);
1533
+ std::unique_ptr<Imputer> new_imputer_ptr(nullptr);
1534
+ std::unique_ptr<TreesIndexer> new_indexer_ptr(nullptr);
1535
+
1536
+ if (is_extended) {
1537
+ ext_model_ptr = static_cast<ExtIsoForest*>(R_ExternalPtrAddr(model_R_ptr));
1538
+ new_ext_model_ptr = std::unique_ptr<ExtIsoForest>(new ExtIsoForest());
1539
+ }
1540
+ else {
1541
+ model_ptr = static_cast<IsoForest*>(R_ExternalPtrAddr(model_R_ptr));
1542
+ new_model_ptr = std::unique_ptr<IsoForest>(new IsoForest());
1543
+ }
1544
+
1545
+ imputer_ptr = static_cast<Imputer*>(R_ExternalPtrAddr(imputer_R_ptr));
1546
+ if (imputer_ptr) {
1547
+ new_imputer_ptr = std::unique_ptr<Imputer>(new Imputer());
1548
+ }
1549
+
1550
+ indexer_ptr = static_cast<TreesIndexer*>(R_ExternalPtrAddr(indexer_R_ptr));
1551
+ if (indexer_ptr) {
1552
+ new_indexer_ptr = std::unique_ptr<TreesIndexer>(new TreesIndexer());
1553
+ }
1554
+
1555
+ std::unique_ptr<size_t[]> trees_take_(new size_t[trees_take.size()]);
1556
+ for (decltype(trees_take.size()) ix = 0; ix < trees_take.size(); ix++)
1557
+ trees_take_[ix] = (size_t)(trees_take[ix] - 1);
1558
+
1559
+ subset_model(model_ptr, new_model_ptr.get(),
1560
+ ext_model_ptr, new_ext_model_ptr.get(),
1561
+ imputer_ptr, new_imputer_ptr.get(),
1562
+ indexer_ptr, new_indexer_ptr.get(),
1563
+ trees_take_.get(), trees_take.size());
1564
+ trees_take_.reset();
1565
+
1566
+ if (is_altrepped)
1567
+ {
1568
+ out["model"] = is_extended?
1569
+ Rcpp::unwindProtect(get_altrepped_pointer<ExtIsoForest>, (void*)&new_ext_model_ptr)
1570
+ :
1571
+ Rcpp::unwindProtect(get_altrepped_pointer<IsoForest>, (void*)&new_model_ptr);
1572
+ out["imputer"] = imputer_ptr?
1573
+ Rcpp::unwindProtect(get_altrepped_pointer<Imputer>, (void*)&new_imputer_ptr)
1574
+ :
1575
+ Rcpp::unwindProtect(safe_get_altrepped_null_pointer, nullptr);
1576
+ out["indexer"] = indexer_ptr?
1577
+ Rcpp::unwindProtect(get_altrepped_pointer<TreesIndexer>, (void*)&new_indexer_ptr)
1578
+ :
1579
+ Rcpp::unwindProtect(safe_get_altrepped_null_pointer, nullptr);
1580
+ }
1581
+
1582
+ else
1583
+ {
1584
+ lst_model["ser"] = is_extended? serialize_cpp_obj(new_ext_model_ptr.get()) : serialize_cpp_obj(new_model_ptr.get());
1585
+ if (imputer_ptr) lst_imputer["ser"] = serialize_cpp_obj(new_imputer_ptr.get());
1586
+ if (indexer_ptr) lst_indexer["ser"] = serialize_cpp_obj(new_indexer_ptr.get());
1587
+
1588
+ lst_model["ptr"] = is_extended?
1589
+ Rcpp::unwindProtect(safe_XPtr<ExtIsoForest>, new_ext_model_ptr.get())
1590
+ :
1591
+ Rcpp::unwindProtect(safe_XPtr<IsoForest>, new_model_ptr.get());
1592
+ new_model_ptr.release();
1593
+
1594
+ if (imputer_ptr) {
1595
+ lst_imputer["ptr"] = Rcpp::unwindProtect(safe_XPtr<Imputer>, new_imputer_ptr.get());
1596
+ new_imputer_ptr.release();
1597
+ }
1598
+
1599
+ if (indexer_ptr) {
1600
+ lst_indexer["ptr"] = Rcpp::unwindProtect(safe_XPtr<TreesIndexer>, new_indexer_ptr.get());
1601
+ new_indexer_ptr.release();
1602
+ }
1603
+
1604
+ out["model"] = lst_model;
1605
+ out["imputer"] = lst_imputer;
1606
+ out["indexer"] = lst_indexer;
1607
+ }
1608
+
1609
+ return out;
1610
+ }
1611
+
1612
+ // [[Rcpp::export(rng = false)]]
1613
+ void inplace_set_to_zero(SEXP obj)
1614
+ {
1615
+ auto obj_type = TYPEOF(obj);
1616
+ switch(obj_type)
1617
+ {
1618
+ case REALSXP:
1619
+ {
1620
+ REAL(obj)[0] = 0;
1621
+ break;
1622
+ }
1623
+
1624
+ case INTSXP:
1625
+ {
1626
+ INTEGER(obj)[0] = 0;
1627
+ break;
1628
+ }
1629
+
1630
+ case LGLSXP:
1631
+ {
1632
+ LOGICAL(obj)[0] = 0;
1633
+ break;
1634
+ }
1635
+
1636
+ default:
1637
+ {
1638
+ Rcpp::stop("Model object has incorrect structure.\n");
1639
+ }
1640
+ }
1641
+ }
1642
+
1643
+ // [[Rcpp::export(rng = false)]]
718
1644
  Rcpp::List get_n_nodes(SEXP model_R_ptr, bool is_extended, int nthreads)
719
1645
  {
720
1646
  size_t ntrees;
@@ -722,21 +1648,21 @@ Rcpp::List get_n_nodes(SEXP model_R_ptr, bool is_extended, int nthreads)
722
1648
  ExtIsoForest* ext_model_ptr = NULL;
723
1649
  if (is_extended)
724
1650
  {
725
- ext_model_ptr = static_cast<ExtIsoForest*>(R_ExternalPtrAddr(model_R_ptr));
1651
+ ext_model_ptr = get_pointer_from_xptr<ExtIsoForest>(model_R_ptr);
726
1652
  ntrees = ext_model_ptr->hplanes.size();
727
1653
  }
728
1654
  else
729
1655
  {
730
- model_ptr = static_cast<IsoForest*>(R_ExternalPtrAddr(model_R_ptr));
1656
+ model_ptr = get_pointer_from_xptr<IsoForest>(model_R_ptr);
731
1657
  ntrees = model_ptr->trees.size();
732
1658
  }
733
1659
 
734
1660
  Rcpp::IntegerVector n_nodes(ntrees);
735
1661
  Rcpp::IntegerVector n_terminal(ntrees);
736
1662
  if (is_extended)
737
- get_num_nodes(*ext_model_ptr, &n_nodes[0], &n_terminal[0], nthreads);
1663
+ get_num_nodes(*ext_model_ptr, INTEGER(n_nodes), INTEGER(n_terminal), nthreads);
738
1664
  else
739
- get_num_nodes(*model_ptr, &n_nodes[0], &n_terminal[0], nthreads);
1665
+ get_num_nodes(*model_ptr, INTEGER(n_nodes), INTEGER(n_terminal), nthreads);
740
1666
 
741
1667
  return Rcpp::List::create(
742
1668
  Rcpp::_["total"] = n_nodes,
@@ -744,52 +1670,224 @@ Rcpp::List get_n_nodes(SEXP model_R_ptr, bool is_extended, int nthreads)
744
1670
  );
745
1671
  }
746
1672
 
747
- // [[Rcpp::export]]
748
- Rcpp::List append_trees_from_other(SEXP model_R_ptr, SEXP other_R_ptr,
749
- SEXP imp_R_ptr, SEXP oimp_R_ptr,
750
- bool is_extended)
1673
+ // [[Rcpp::export(rng = false)]]
1674
+ void append_trees_from_other(SEXP model_R_ptr, SEXP other_R_ptr,
1675
+ SEXP imp_R_ptr, SEXP oimp_R_ptr,
1676
+ SEXP ind_R_ptr, SEXP oind_R_ptr,
1677
+ bool is_extended,
1678
+ Rcpp::RawVector serialized_obj,
1679
+ Rcpp::RawVector serialized_imputer,
1680
+ Rcpp::RawVector serialized_indexer,
1681
+ Rcpp::List &model_cpp_obj_update,
1682
+ Rcpp::List &model_params_update,
1683
+ bool is_altrepped)
751
1684
  {
752
- Rcpp::List out;
753
- IsoForest* model_ptr = NULL;
754
- IsoForest* other_ptr = NULL;
755
- ExtIsoForest* ext_model_ptr = NULL;
756
- ExtIsoForest* ext_other_ptr = NULL;
757
- Imputer* imputer_ptr = NULL;
758
- Imputer* oimputer_ptr = NULL;
1685
+ Rcpp::List out = Rcpp::List::create(
1686
+ Rcpp::_["model_ser"] = R_NilValue,
1687
+ Rcpp::_["imputer_ser"] = R_NilValue,
1688
+ Rcpp::_["indexer_ser"] = R_NilValue
1689
+ );
1690
+
1691
+ Rcpp::IntegerVector ntrees_new = Rcpp::IntegerVector::create(Rf_asInteger(model_params_update["ntrees"]));
1692
+
1693
+ IsoForest* model_ptr = nullptr;
1694
+ IsoForest* other_ptr = nullptr;
1695
+ ExtIsoForest* ext_model_ptr = nullptr;
1696
+ ExtIsoForest* ext_other_ptr = nullptr;
1697
+ Imputer* imputer_ptr = static_cast<Imputer*>(R_ExternalPtrAddr(imp_R_ptr));
1698
+ Imputer* oimputer_ptr = static_cast<Imputer*>(R_ExternalPtrAddr(oimp_R_ptr));
1699
+ TreesIndexer* indexer_ptr = get_indexer_ptr_from_R_obj(ind_R_ptr);
1700
+ TreesIndexer* oindexer_ptr = get_indexer_ptr_from_R_obj(oind_R_ptr);
1701
+ size_t old_ntrees;
759
1702
 
760
1703
  if (is_extended) {
761
1704
  ext_model_ptr = static_cast<ExtIsoForest*>(R_ExternalPtrAddr(model_R_ptr));
762
1705
  ext_other_ptr = static_cast<ExtIsoForest*>(R_ExternalPtrAddr(other_R_ptr));
1706
+ old_ntrees = ext_model_ptr->hplanes.size();
1707
+ if (ext_model_ptr == ext_other_ptr) {
1708
+ throw Rcpp::exception("Error: attempting to append trees from one model to itself.");
1709
+ }
763
1710
  } else {
764
1711
  model_ptr = static_cast<IsoForest*>(R_ExternalPtrAddr(model_R_ptr));
765
1712
  other_ptr = static_cast<IsoForest*>(R_ExternalPtrAddr(other_R_ptr));
1713
+ old_ntrees = model_ptr->trees.size();
1714
+ if (model_ptr == other_ptr) {
1715
+ throw Rcpp::exception("Error: attempting to append trees from one model to itself.");
1716
+ }
766
1717
  }
767
1718
 
768
- if (!Rf_isNull(imp_R_ptr) && !Rf_isNull(oimp_R_ptr) &&
769
- R_ExternalPtrAddr(imp_R_ptr) != NULL &&
770
- R_ExternalPtrAddr(oimp_R_ptr) != NULL)
771
- {
772
- imputer_ptr = static_cast<Imputer*>(R_ExternalPtrAddr(imp_R_ptr));
773
- oimputer_ptr = static_cast<Imputer*>(R_ExternalPtrAddr(oimp_R_ptr));
1719
+ if (imputer_ptr && !oimputer_ptr) {
1720
+ throw Rcpp::exception("Model to append trees to has imputer, but model to append from doesn't. Try dropping the imputer.\n");
1721
+ }
1722
+
1723
+ if (indexer_ptr && !oindexer_ptr) {
1724
+ throw Rcpp::exception("Model to append trees to has indexer, but model to append from doesn't. Try dropping the indexer.\n");
774
1725
  }
775
1726
 
1727
+
776
1728
  merge_models(model_ptr, other_ptr,
777
1729
  ext_model_ptr, ext_other_ptr,
778
- imputer_ptr, oimputer_ptr);
1730
+ imputer_ptr, oimputer_ptr,
1731
+ indexer_ptr, oindexer_ptr);
779
1732
 
1733
+ Rcpp::RawVector new_serialized, new_imp_serialized, new_ind_serialized;
1734
+ size_t new_size;
780
1735
 
781
- if (is_extended)
782
- out["serialized"] = serialize_cpp_obj(ext_model_ptr);
783
- else
784
- out["serialized"] = serialize_cpp_obj(model_ptr);
1736
+ if (is_altrepped) goto dont_serialize;
785
1737
 
786
- if (imputer_ptr != NULL && oimputer_ptr != NULL)
787
- out["imp_ser"] = serialize_cpp_obj(imputer_ptr);
1738
+ try
1739
+ {
1740
+ if (!is_extended)
1741
+ {
1742
+ if (serialized_obj.size() &&
1743
+ check_can_undergo_incremental_serialization(*model_ptr, (char*)RAW(serialized_obj)))
1744
+ {
1745
+ try {
1746
+ new_size = serialized_obj.size()
1747
+ + determine_serialized_size_additional_trees(*model_ptr, old_ntrees);
1748
+ new_serialized = resize_vec(serialized_obj, new_size);
1749
+ char *temp = (char*)RAW(new_serialized);
1750
+ incremental_serialize_isotree(*model_ptr, temp);
1751
+ out["model_ser"] = new_serialized;
1752
+ }
1753
+
1754
+ catch (std::runtime_error &e) {
1755
+ goto serialize_anew_singlevar;
1756
+ }
1757
+ }
1758
+
1759
+ else {
1760
+ serialize_anew_singlevar:
1761
+ out["model_ser"] = serialize_cpp_obj(model_ptr);
1762
+ }
1763
+ }
788
1764
 
789
- return out;
1765
+ else
1766
+ {
1767
+ if (serialized_obj.size() &&
1768
+ check_can_undergo_incremental_serialization(*ext_model_ptr, (char*)RAW(serialized_obj)))
1769
+ {
1770
+ try {
1771
+ new_size = serialized_obj.size()
1772
+ + determine_serialized_size_additional_trees(*ext_model_ptr, old_ntrees);
1773
+ new_serialized = resize_vec(serialized_obj, new_size);
1774
+ char *temp = (char*)RAW(new_serialized);
1775
+ incremental_serialize_isotree(*ext_model_ptr, temp);
1776
+ out["model_ser"] = new_serialized;
1777
+ }
1778
+
1779
+ catch (std::runtime_error &e) {
1780
+ goto serialize_anew_ext;
1781
+ }
1782
+ }
1783
+
1784
+ else {
1785
+ serialize_anew_ext:
1786
+ out["model_ser"] = serialize_cpp_obj(ext_model_ptr);
1787
+ }
1788
+ }
1789
+
1790
+ if (imputer_ptr)
1791
+ {
1792
+ if (serialized_imputer.size() &&
1793
+ check_can_undergo_incremental_serialization(*imputer_ptr, (char*)RAW(serialized_imputer)))
1794
+ {
1795
+ try {
1796
+ new_size = serialized_obj.size()
1797
+ + determine_serialized_size_additional_trees(*imputer_ptr, old_ntrees);
1798
+ new_imp_serialized = resize_vec(serialized_imputer, new_size);
1799
+ char *temp = (char*)RAW(new_imp_serialized);
1800
+ incremental_serialize_isotree(*imputer_ptr, temp);
1801
+ out["imputer_ser"] = new_imp_serialized;
1802
+ }
1803
+
1804
+ catch (std::runtime_error &e) {
1805
+ goto serialize_anew_imp;
1806
+ }
1807
+ }
1808
+
1809
+ else {
1810
+ serialize_anew_imp:
1811
+ out["imputer_ser"] = serialize_cpp_obj(imputer_ptr);
1812
+ }
1813
+ }
1814
+
1815
+ if (indexer_ptr)
1816
+ {
1817
+ if (serialized_indexer.size() &&
1818
+ check_can_undergo_incremental_serialization(*indexer_ptr, (char*)RAW(serialized_indexer)))
1819
+ {
1820
+ try {
1821
+ new_size = serialized_obj.size()
1822
+ + determine_serialized_size_additional_trees(*indexer_ptr, old_ntrees);
1823
+ new_ind_serialized = resize_vec(serialized_indexer, new_size);
1824
+ char *temp = (char*)RAW(new_ind_serialized);
1825
+ incremental_serialize_isotree(*indexer_ptr, temp);
1826
+ out["indexer_ser"] = new_ind_serialized;
1827
+ }
1828
+
1829
+ catch (std::runtime_error &e) {
1830
+ goto serialize_anew_ind;
1831
+ }
1832
+ }
1833
+
1834
+ else {
1835
+ serialize_anew_ind:
1836
+ out["indexer_ser"] = serialize_cpp_obj(indexer_ptr);
1837
+ }
1838
+ }
1839
+ }
1840
+
1841
+ catch (...)
1842
+ {
1843
+ if (!is_extended)
1844
+ model_ptr->trees.resize(old_ntrees);
1845
+ else
1846
+ ext_model_ptr->hplanes.resize(old_ntrees);
1847
+
1848
+ if (imputer_ptr)
1849
+ imputer_ptr->imputer_tree.resize(old_ntrees);
1850
+ if (indexer_ptr)
1851
+ indexer_ptr->indices.resize(old_ntrees);
1852
+ throw;
1853
+ }
1854
+
1855
+ {
1856
+ Rcpp::List model_lst = model_cpp_obj_update["model"];
1857
+ model_lst["ser"] = out["model_ser"];
1858
+ model_cpp_obj_update["model"] = model_lst;
1859
+
1860
+ if (imputer_ptr)
1861
+ {
1862
+ Rcpp::List imputer_lst = model_cpp_obj_update["imputer"];
1863
+ imputer_lst["ser"] = out["imputer_ser"];
1864
+ model_cpp_obj_update["imputer"] = imputer_lst;
1865
+ }
1866
+
1867
+ if (indexer_ptr)
1868
+ {
1869
+ Rcpp::List indexer_lst = model_cpp_obj_update["indexer"];
1870
+ indexer_lst["ser"] = out["indexer_ser"];
1871
+ model_cpp_obj_update["indexer"] = indexer_lst;
1872
+ }
1873
+ }
1874
+
1875
+ dont_serialize:
1876
+ *(INTEGER(ntrees_new)) = is_extended? ext_model_ptr->hplanes.size() : model_ptr->trees.size();
1877
+ model_params_update["ntrees"] = ntrees_new;
790
1878
  }
791
1879
 
792
- // [[Rcpp::export]]
1880
+ SEXP alloc_List(void *data)
1881
+ {
1882
+ return Rcpp::List(*(size_t*)data);
1883
+ }
1884
+
1885
+ SEXP safe_CastString(void *data)
1886
+ {
1887
+ return Rcpp::CharacterVector(*(std::string*)data);
1888
+ }
1889
+
1890
+ // [[Rcpp::export(rng = false)]]
793
1891
  Rcpp::ListOf<Rcpp::CharacterVector> model_to_sql(SEXP model_R_ptr, bool is_extended,
794
1892
  Rcpp::CharacterVector numeric_colanmes,
795
1893
  Rcpp::CharacterVector categ_colnames,
@@ -814,13 +1912,16 @@ Rcpp::ListOf<Rcpp::CharacterVector> model_to_sql(SEXP model_R_ptr, bool is_exten
814
1912
  categ_levels_cpp,
815
1913
  output_tree_num, true, single_tree, tree_num,
816
1914
  nthreads);
817
- Rcpp::List out(res.size());
1915
+ /* TODO: this function could create objects through the ALTREP system instead.
1916
+ That way, it would avoid an extra copy of the data */
1917
+ size_t sz = res.size();
1918
+ Rcpp::List out = Rcpp::unwindProtect(alloc_List, (void*)&sz);
818
1919
  for (size_t ix = 0; ix < res.size(); ix++)
819
- out[ix] = Rcpp::CharacterVector(res[ix]);
1920
+ out[ix] = Rcpp::unwindProtect(safe_CastString, &(res[ix]));
820
1921
  return out;
821
1922
  }
822
1923
 
823
- // [[Rcpp::export]]
1924
+ // [[Rcpp::export(rng = false)]]
824
1925
  Rcpp::CharacterVector model_to_sql_with_select_from(SEXP model_R_ptr, bool is_extended,
825
1926
  Rcpp::CharacterVector numeric_colanmes,
826
1927
  Rcpp::CharacterVector categ_colnames,
@@ -842,11 +1943,1058 @@ Rcpp::CharacterVector model_to_sql_with_select_from(SEXP model_R_ptr, bool is_ex
842
1943
  std::string table_from_cpp = Rcpp::as<std::string>(table_from);
843
1944
  std::string select_as_cpp = Rcpp::as<std::string>(select_as);
844
1945
 
845
- return generate_sql_with_select_from(model_ptr, ext_model_ptr,
846
- table_from_cpp, select_as_cpp,
847
- numeric_colanmes_cpp, categ_colanmes_cpp,
848
- categ_levels_cpp,
849
- true, nthreads);
1946
+ std::string out = generate_sql_with_select_from(model_ptr, ext_model_ptr,
1947
+ table_from_cpp, select_as_cpp,
1948
+ numeric_colanmes_cpp, categ_colanmes_cpp,
1949
+ categ_levels_cpp,
1950
+ true, nthreads);
1951
+ /* TODO: this function could create objects through the ALTREP system instead.
1952
+ That way, it would avoid an extra copy of the data */
1953
+ return Rcpp::unwindProtect(safe_CastString, &out);
1954
+ }
1955
+
1956
+ // [[Rcpp::export(rng = false)]]
1957
+ Rcpp::List copy_cpp_objects(SEXP model_R_ptr, bool is_extended, SEXP imp_R_ptr, SEXP ind_R_ptr, bool lazy_serialization)
1958
+ {
1959
+ Rcpp::List out = Rcpp::List::create(
1960
+ Rcpp::_["model"] = Rcpp::XPtr<void*>(nullptr, false),
1961
+ Rcpp::_["imputer"] = Rcpp::XPtr<void*>(nullptr, false),
1962
+ Rcpp::_["indexer"] = Rcpp::XPtr<void*>(nullptr, false)
1963
+ );
1964
+
1965
+ IsoForest* model_ptr = NULL;
1966
+ ExtIsoForest* ext_model_ptr = NULL;
1967
+ Imputer* imputer_ptr = NULL;
1968
+ TreesIndexer* indexer_ptr = NULL;
1969
+ if (is_extended)
1970
+ ext_model_ptr = static_cast<ExtIsoForest*>(R_ExternalPtrAddr(model_R_ptr));
1971
+ else
1972
+ model_ptr = static_cast<IsoForest*>(R_ExternalPtrAddr(model_R_ptr));
1973
+ if (R_ExternalPtrAddr(imp_R_ptr))
1974
+ imputer_ptr = static_cast<Imputer*>(R_ExternalPtrAddr(imp_R_ptr));
1975
+ if (R_ExternalPtrAddr(ind_R_ptr))
1976
+ indexer_ptr = static_cast<TreesIndexer*>(R_ExternalPtrAddr(ind_R_ptr));
1977
+
1978
+ std::unique_ptr<IsoForest> copy_model(new IsoForest());
1979
+ std::unique_ptr<ExtIsoForest> copy_ext_model(new ExtIsoForest());
1980
+ std::unique_ptr<Imputer> copy_imputer(new Imputer());
1981
+ std::unique_ptr<TreesIndexer> copy_indexer(new TreesIndexer());
1982
+
1983
+ if (model_ptr)
1984
+ *copy_model = *model_ptr;
1985
+ if (ext_model_ptr)
1986
+ *copy_ext_model = *ext_model_ptr;
1987
+ if (imputer_ptr)
1988
+ *copy_imputer = *imputer_ptr;
1989
+ if (indexer_ptr)
1990
+ *copy_indexer = *indexer_ptr;
1991
+
1992
+ if (lazy_serialization)
1993
+ {
1994
+ if (is_extended) {
1995
+ out["model"] = Rcpp::unwindProtect(get_altrepped_pointer<ExtIsoForest>, (void*)&copy_ext_model);
1996
+ }
1997
+ else {
1998
+ out["model"] = Rcpp::unwindProtect(get_altrepped_pointer<IsoForest>, (void*)&copy_model);
1999
+ }
2000
+
2001
+ if (imputer_ptr) {
2002
+ out["imputer"] = Rcpp::unwindProtect(get_altrepped_pointer<Imputer>, (void*)&copy_imputer);
2003
+ }
2004
+ else {
2005
+ out["imputer"] = Rcpp::unwindProtect(safe_get_altrepped_null_pointer, nullptr);
2006
+ }
2007
+
2008
+ if (indexer_ptr) {
2009
+ out["indexer"] = Rcpp::unwindProtect(get_altrepped_pointer<TreesIndexer>, (void*)&copy_indexer);
2010
+ }
2011
+ else {
2012
+ out["indexer"] = Rcpp::unwindProtect(safe_get_altrepped_null_pointer, nullptr);
2013
+ }
2014
+ }
2015
+
2016
+ else
2017
+ {
2018
+ if (is_extended) {
2019
+ out["model"] = Rcpp::unwindProtect(safe_XPtr<ExtIsoForest>, copy_ext_model.get());
2020
+ copy_ext_model.release();
2021
+ }
2022
+ else {
2023
+ out["model"] = Rcpp::unwindProtect(safe_XPtr<IsoForest>, copy_model.get());
2024
+ copy_model.release();
2025
+ }
2026
+ if (imputer_ptr) {
2027
+ out["imputer"] = Rcpp::unwindProtect(safe_XPtr<Imputer>, copy_imputer.get());
2028
+ copy_imputer.release();
2029
+ }
2030
+ if (indexer_ptr) {
2031
+ out["indexer"] = Rcpp::unwindProtect(safe_XPtr<TreesIndexer>, copy_indexer.get());
2032
+ copy_indexer.release();
2033
+ }
2034
+ }
2035
+
2036
+ return out;
2037
+ }
2038
+
2039
+ // [[Rcpp::export(rng = false)]]
2040
+ void build_tree_indices(Rcpp::List lst_cpp_objects, SEXP ptr_model, bool is_altrepped, bool is_extended, bool with_distances, int nthreads)
2041
+ {
2042
+ Rcpp::List lst_out = Rcpp::List::create(
2043
+ Rcpp::_["ptr"] = R_NilValue,
2044
+ Rcpp::_["ser"] = R_NilValue
2045
+ );
2046
+ std::unique_ptr<TreesIndexer> indexer(new TreesIndexer());
2047
+
2048
+ if (!is_extended) {
2049
+ build_tree_indices(*indexer,
2050
+ *static_cast<IsoForest*>(R_ExternalPtrAddr(ptr_model)),
2051
+ nthreads,
2052
+ with_distances);
2053
+ }
2054
+ else {
2055
+ build_tree_indices(*indexer,
2056
+ *static_cast<ExtIsoForest*>(R_ExternalPtrAddr(ptr_model)),
2057
+ nthreads,
2058
+ with_distances);
2059
+ }
2060
+
2061
+ if (is_altrepped) {
2062
+ lst_cpp_objects["indexer"] = Rcpp::unwindProtect(get_altrepped_pointer<TreesIndexer>, (void*)&indexer);
2063
+ }
2064
+
2065
+ else {
2066
+ lst_out["ser"] = serialize_cpp_obj(indexer.get());
2067
+ lst_out["ptr"] = Rcpp::unwindProtect(safe_XPtr<TreesIndexer>, indexer.get());
2068
+ indexer.release();
2069
+ lst_cpp_objects["indexer"] = lst_out;
2070
+ }
2071
+ }
2072
+
2073
+ // [[Rcpp::export(rng = false)]]
2074
+ bool check_node_indexer_has_distances(SEXP indexer_R_ptr)
2075
+ {
2076
+ const TreesIndexer *indexer = (const TreesIndexer*)R_ExternalPtrAddr(indexer_R_ptr);
2077
+ if (!indexer) return false;
2078
+ return !indexer->indices.front().node_distances.empty();
2079
+ }
2080
+
2081
+ // [[Rcpp::export(rng = false)]]
2082
+ void set_reference_points(Rcpp::List lst_cpp_objects, SEXP ptr_model, SEXP ind_R_ptr, bool is_altrepped,
2083
+ Rcpp::List lst_metadata, SEXP rnames, bool is_extended,
2084
+ Rcpp::NumericVector X_num, Rcpp::IntegerVector X_cat,
2085
+ Rcpp::NumericVector Xc, Rcpp::IntegerVector Xc_ind, Rcpp::IntegerVector Xc_indptr,
2086
+ size_t nrows, int nthreads, bool with_distances)
2087
+ {
2088
+ Rcpp::List lst_out = Rcpp::List::create(
2089
+ Rcpp::_["ptr"] = R_NilValue,
2090
+ Rcpp::_["ser"] = R_NilValue
2091
+ );
2092
+
2093
+ double* numeric_data_ptr = NULL;
2094
+ int* categ_data_ptr = NULL;
2095
+ double* Xc_ptr = NULL;
2096
+ int* Xc_ind_ptr = NULL;
2097
+ int* Xc_indptr_ptr = NULL;
2098
+ Rcpp::NumericVector Xcpp;
2099
+
2100
+ if (X_num.size())
2101
+ {
2102
+ numeric_data_ptr = REAL(X_num);
2103
+ }
2104
+
2105
+ if (X_cat.size())
2106
+ {
2107
+ categ_data_ptr = INTEGER(X_cat);
2108
+ }
2109
+
2110
+ if (Xc_indptr.size())
2111
+ {
2112
+ Xc_ptr = REAL(Xc);
2113
+ Xc_ind_ptr = INTEGER(Xc_ind);
2114
+ Xc_indptr_ptr = INTEGER(Xc_indptr);
2115
+ }
2116
+
2117
+ IsoForest* model_ptr = nullptr;
2118
+ ExtIsoForest* ext_model_ptr = nullptr;
2119
+ TreesIndexer* indexer = static_cast<TreesIndexer*>(R_ExternalPtrAddr(ind_R_ptr));
2120
+ if (is_extended)
2121
+ ext_model_ptr = static_cast<ExtIsoForest*>(R_ExternalPtrAddr(ptr_model));
2122
+ else
2123
+ model_ptr = static_cast<IsoForest*>(R_ExternalPtrAddr(ptr_model));
2124
+
2125
+ MissingAction missing_action = is_extended?
2126
+ ext_model_ptr->missing_action
2127
+ :
2128
+ model_ptr->missing_action;
2129
+ if (missing_action != Fail)
2130
+ {
2131
+ if (X_num.size()) numeric_data_ptr = set_R_nan_as_C_nan(numeric_data_ptr, X_num.size(), Xcpp, nthreads);
2132
+ if (Xc.size()) Xc_ptr = set_R_nan_as_C_nan(Xc_ptr, Xc.size(), Xcpp, nthreads);
2133
+ }
2134
+
2135
+ std::unique_ptr<TreesIndexer> new_indexer(is_altrepped? nullptr : (new TreesIndexer(*indexer)));
2136
+ TreesIndexer *indexer_use = is_altrepped? indexer : new_indexer.get();
2137
+
2138
+ /* Note: if using an altrepped pointer, the indexer is modified in-place. If that fails,
2139
+ it will end up overwitten, with the previous references taken away. OTOH, if using
2140
+ a pointer + serialized, and it fails, it should not overwrite anything, and thus
2141
+ should not re-assign here immediately. */
2142
+ if (is_altrepped) {
2143
+ lst_metadata["reference_names"] = rnames;
2144
+ }
2145
+
2146
+ set_reference_points(model_ptr, ext_model_ptr, indexer_use,
2147
+ with_distances,
2148
+ numeric_data_ptr, categ_data_ptr,
2149
+ true, (size_t)0, (size_t)0,
2150
+ Xc_ptr, Xc_ind_ptr, Xc_indptr_ptr,
2151
+ (double*)NULL, (int*)NULL, (int*)NULL,
2152
+ nrows, nthreads);
2153
+
2154
+ if (!is_altrepped) {
2155
+ lst_out["ser"] = serialize_cpp_obj(new_indexer.get());
2156
+ *indexer = std::move(*new_indexer);
2157
+ lst_metadata["reference_names"] = rnames;
2158
+ }
2159
+ }
2160
+
2161
+ // [[Rcpp::export(rng = false)]]
2162
+ bool check_node_indexer_has_references(SEXP indexer_R_ptr)
2163
+ {
2164
+ const TreesIndexer *indexer = (const TreesIndexer*)R_ExternalPtrAddr(indexer_R_ptr);
2165
+ if (!indexer) return false;
2166
+ return !(indexer->indices.front().reference_points.empty());
2167
+ }
2168
+
2169
+ // [[Rcpp::export(rng = false)]]
2170
+ int get_num_references(SEXP indexer_R_ptr)
2171
+ {
2172
+ const TreesIndexer *indexer = static_cast<const TreesIndexer*>(R_ExternalPtrAddr(indexer_R_ptr));
2173
+ if (!indexer || indexer->indices.empty()) return 0;
2174
+ return indexer->indices.front().reference_points.size();
2175
+ }
2176
+
2177
+ // [[Rcpp::export(rng = false)]]
2178
+ SEXP get_null_R_pointer_internal(bool altrepped)
2179
+ {
2180
+ if (!altrepped) {
2181
+ return R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue);
2182
+ }
2183
+ else {
2184
+ SEXP R_ptr = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue));
2185
+ SEXP out = PROTECT(R_new_altrep(altrepped_pointer_NullPointer, R_ptr, R_NilValue));
2186
+ UNPROTECT(2);
2187
+ return out;
2188
+ }
2189
+ }
2190
+
2191
+ /* This library will use different code paths for opening a file path
2192
+ in order to support non-ASCII characters, depending on compiler and
2193
+ platform support. */
2194
+ #if (defined(_WIN32) || defined(_WIN64))
2195
+ # if defined(__GNUC__) && (__GNUC__ >= 5)
2196
+ # define USE_CODECVT
2197
+ # define TAKE_AS_UTF8 true
2198
+ # elif !defined(_FOR_CRAN)
2199
+ # define USE_RC_FOPEN
2200
+ # define TAKE_AS_UTF8 false
2201
+ # else
2202
+ # define USE_SIMPLE_FOPEN
2203
+ # define TAKE_AS_UTF8 false
2204
+ # endif
2205
+ #else
2206
+ # define USE_SIMPLE_FOPEN
2207
+ # define TAKE_AS_UTF8 false
2208
+ #endif
2209
+
2210
+ /* Now the actual implementations */
2211
+ #ifdef USE_CODECVT
2212
+ /* https://stackoverflow.com/questions/2573834/c-convert-string-or-char-to-wstring-or-wchar-t */
2213
+ /* */
2214
+ #include <locale>
2215
+ #include <codecvt>
2216
+ #include <string>
2217
+ FILE* R_fopen(Rcpp::CharacterVector fname, const char *mode)
2218
+ {
2219
+ Rcpp::String s(fname[0], CE_UTF8);
2220
+ std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
2221
+ std::wstring wide = converter.from_bytes(s.get_cstring());
2222
+ std::string mode__(mode);
2223
+ std::wstring mode_ = converter.from_bytes(mode__);
2224
+ return _wfopen(wide.c_str(), mode_.c_str());
2225
+ }
2226
+ #endif
2227
+
2228
+ #ifdef USE_RC_FOPEN
2229
+ extern "C" {
2230
+ FILE *RC_fopen(const SEXP fn, const char *mode, const Rboolean expand);
2231
+ }
2232
+ FILE* R_fopen(Rcpp::CharacterVector fname, const char *mode)
2233
+ {
2234
+ return RC_fopen(fname[0], mode, FALSE);
2235
+ }
2236
+ #endif
2237
+
2238
+ #ifdef USE_SIMPLE_FOPEN
2239
+ FILE* R_fopen(Rcpp::CharacterVector fname, const char *mode)
2240
+ {
2241
+ return fopen(fname[0], mode);
2242
+ }
2243
+ #endif
2244
+
2245
+ class FileOpener
2246
+ {
2247
+ public:
2248
+ FILE *handle = NULL;
2249
+ FileOpener(const SEXP fname, const char *mode)
2250
+ {
2251
+ if (this->handle != NULL)
2252
+ this->close_file();
2253
+ this->handle = R_fopen(fname, mode);
2254
+ }
2255
+ FILE *get_handle()
2256
+ {
2257
+ return this->handle;
2258
+ }
2259
+ void close_file()
2260
+ {
2261
+ if (this->handle != NULL) {
2262
+ fclose(this->handle);
2263
+ this->handle = NULL;
2264
+ }
2265
+ }
2266
+ ~FileOpener()
2267
+ {
2268
+ this->close_file();
2269
+ }
2270
+ };
2271
+
2272
+ // [[Rcpp::export]]
2273
+ void serialize_to_file
2274
+ (
2275
+ Rcpp::RawVector serialized_obj,
2276
+ Rcpp::RawVector serialized_imputer,
2277
+ Rcpp::RawVector serialized_indexer,
2278
+ bool is_extended,
2279
+ Rcpp::RawVector metadata,
2280
+ Rcpp::CharacterVector fname
2281
+ )
2282
+ {
2283
+ FileOpener file_(fname[0], "wb");
2284
+ FILE *output_file = file_.get_handle();
2285
+ serialize_combined(
2286
+ is_extended? nullptr : (char*)RAW(serialized_obj),
2287
+ is_extended? (char*)RAW(serialized_obj) : nullptr,
2288
+ serialized_imputer.size()? (char*)RAW(serialized_imputer) : nullptr,
2289
+ serialized_indexer.size()? (char*)RAW(serialized_indexer) : nullptr,
2290
+ metadata.size()? (char*)RAW(metadata) : nullptr,
2291
+ metadata.size(),
2292
+ output_file
2293
+ );
2294
+ }
2295
+
2296
+ // [[Rcpp::export]]
2297
+ Rcpp::List deserialize_from_file(Rcpp::CharacterVector fname, bool lazy_serialization)
2298
+ {
2299
+ Rcpp::List out = Rcpp::List::create(
2300
+ Rcpp::_["model"] = R_NilValue,
2301
+ Rcpp::_["imputer"] = R_NilValue,
2302
+ Rcpp::_["indexer"] = R_NilValue,
2303
+ Rcpp::_["metadata"] = R_NilValue
2304
+ );
2305
+
2306
+ if (!lazy_serialization) {
2307
+ out["model"] = Rcpp::List::create(
2308
+ Rcpp::_["ptr"] = Rcpp::XPtr<void*>(nullptr, false),
2309
+ Rcpp::_["ser"] = R_NilValue
2310
+ );
2311
+ out["imputer"] = Rcpp::List::create(
2312
+ Rcpp::_["ptr"] = Rcpp::XPtr<void*>(nullptr, false),
2313
+ Rcpp::_["ser"] = R_NilValue
2314
+ );
2315
+ out["indexer"] = Rcpp::List::create(
2316
+ Rcpp::_["ptr"] = Rcpp::XPtr<void*>(nullptr, false),
2317
+ Rcpp::_["ser"] = R_NilValue
2318
+ );
2319
+ }
2320
+
2321
+ FileOpener file_(fname[0], "rb");
2322
+ FILE *input_file = file_.get_handle();
2323
+
2324
+ bool is_isotree_model;
2325
+ bool is_compatible;
2326
+ bool has_combined_objects;
2327
+ bool has_IsoForest;
2328
+ bool has_ExtIsoForest;
2329
+ bool has_Imputer;
2330
+ bool has_Indexer;
2331
+ bool has_metadata;
2332
+ size_t size_metadata;
2333
+
2334
+ inspect_serialized_object(
2335
+ input_file,
2336
+ is_isotree_model,
2337
+ is_compatible,
2338
+ has_combined_objects,
2339
+ has_IsoForest,
2340
+ has_ExtIsoForest,
2341
+ has_Imputer,
2342
+ has_Indexer,
2343
+ has_metadata,
2344
+ size_metadata
2345
+ );
2346
+
2347
+ if (!is_isotree_model || !has_combined_objects)
2348
+ Rcpp::stop("Input file is not a serialized isotree model.\n");
2349
+ if (!is_compatible)
2350
+ Rcpp::stop("Model file format is incompatible.\n");
2351
+ if (!size_metadata)
2352
+ Rcpp::stop("Input file does not contain metadata.\n");
2353
+
2354
+ out["metadata"] = Rcpp::unwindProtect(alloc_RawVec, (void*)&size_metadata);
2355
+
2356
+ std::unique_ptr<IsoForest> model(new IsoForest());
2357
+ std::unique_ptr<ExtIsoForest> model_ext(new ExtIsoForest());
2358
+ std::unique_ptr<Imputer> imputer(new Imputer());
2359
+ std::unique_ptr<TreesIndexer> indexer(new TreesIndexer());
2360
+
2361
+ IsoForest *ptr_model = NULL;
2362
+ ExtIsoForest *ptr_model_ext = NULL;
2363
+ Imputer *ptr_imputer = NULL;
2364
+ TreesIndexer *ptr_indexer = NULL;
2365
+ char *ptr_metadata = (char*)RAW(out["metadata"]);
2366
+
2367
+ if (has_IsoForest)
2368
+ ptr_model = model.get();
2369
+ if (has_ExtIsoForest)
2370
+ ptr_model_ext = model_ext.get();
2371
+ if (has_Imputer)
2372
+ ptr_imputer = imputer.get();
2373
+ if (has_Indexer)
2374
+ ptr_indexer = indexer.get();
2375
+
2376
+ deserialize_combined(
2377
+ input_file,
2378
+ ptr_model,
2379
+ ptr_model_ext,
2380
+ ptr_imputer,
2381
+ ptr_indexer,
2382
+ ptr_metadata
2383
+ );
2384
+
2385
+ if (lazy_serialization)
2386
+ {
2387
+ if (has_IsoForest)
2388
+ out["model"] = Rcpp::unwindProtect(get_altrepped_pointer<IsoForest>, &model);
2389
+ else
2390
+ out["model"] = Rcpp::unwindProtect(get_altrepped_pointer<ExtIsoForest>, &model_ext);
2391
+
2392
+ if (has_Imputer)
2393
+ out["imputer"] = Rcpp::unwindProtect(get_altrepped_pointer<Imputer>, &imputer);
2394
+ else
2395
+ out["imputer"] = Rcpp::unwindProtect(safe_get_altrepped_null_pointer, nullptr);
2396
+
2397
+ if (has_Imputer)
2398
+ out["indexer"] = Rcpp::unwindProtect(get_altrepped_pointer<TreesIndexer>, &indexer);
2399
+ else
2400
+ out["indexer"] = Rcpp::unwindProtect(safe_get_altrepped_null_pointer, nullptr);
2401
+ }
2402
+
2403
+ else
2404
+ {
2405
+ Rcpp::List tmp_model = out["model"];
2406
+ Rcpp::List tmp_imputer = out["imputer"];
2407
+ Rcpp::List tmp_indexer = out["indexer"];
2408
+
2409
+ if (has_IsoForest)
2410
+ tmp_model["ser"] = serialize_cpp_obj(model.get());
2411
+ else
2412
+ tmp_model["ser"] = serialize_cpp_obj(model_ext.get());
2413
+
2414
+ if (has_Imputer)
2415
+ tmp_imputer["ser"] = serialize_cpp_obj(imputer.get());
2416
+
2417
+ if (has_Indexer)
2418
+ tmp_indexer["ser"] = serialize_cpp_obj(indexer.get());
2419
+
2420
+ if (has_IsoForest) {
2421
+ tmp_model["ptr"] = Rcpp::unwindProtect(safe_XPtr<IsoForest>, model.get());
2422
+ model.release();
2423
+ }
2424
+ else {
2425
+ tmp_model["ptr"] = Rcpp::unwindProtect(safe_XPtr<ExtIsoForest>, model_ext.get());
2426
+ model_ext.release();
2427
+ }
2428
+ if (has_Imputer) {
2429
+ tmp_imputer["ptr"] = Rcpp::unwindProtect(safe_XPtr<Imputer>, imputer.get());
2430
+ imputer.release();
2431
+ }
2432
+ if (has_Indexer) {
2433
+ tmp_indexer["ptr"] = Rcpp::unwindProtect(safe_XPtr<TreesIndexer>, indexer.get());
2434
+ indexer.release();
2435
+ }
2436
+
2437
+ out["model"] = tmp_model;
2438
+ out["imputer"] = tmp_imputer;
2439
+ out["indexer"] = tmp_indexer;
2440
+ }
2441
+
2442
+ return out;
2443
+ }
2444
+
2445
+ /* The functions below make for missing functionality in the
2446
+ 'Matrix' and 'SparseM' packages for sub-setting the data */
2447
+
2448
+ // [[Rcpp::export(rng = false)]]
2449
+ void call_sort_csc_indices(Rcpp::NumericVector Xc, Rcpp::IntegerVector Xc_ind, Rcpp::IntegerVector Xc_indptr)
2450
+ {
2451
+ size_t ncols_numeric = Xc_indptr.size() - 1;
2452
+ sort_csc_indices(REAL(Xc), INTEGER(Xc_ind), INTEGER(Xc_indptr), ncols_numeric);
2453
+ }
2454
+
2455
+ // [[Rcpp::export(rng = false)]]
2456
+ void call_reconstruct_csr_sliced
2457
+ (
2458
+ Rcpp::NumericVector orig_Xr, Rcpp::IntegerVector orig_Xr_indptr,
2459
+ Rcpp::NumericVector rec_Xr, Rcpp::IntegerVector rec_Xr_indptr,
2460
+ size_t nrows
2461
+ )
2462
+ {
2463
+ reconstruct_csr_sliced<double, int>(
2464
+ REAL(orig_Xr), INTEGER(orig_Xr_indptr),
2465
+ REAL(rec_Xr), INTEGER(rec_Xr_indptr),
2466
+ nrows
2467
+ );
2468
+ }
2469
+
2470
+ // [[Rcpp::export(rng = false)]]
2471
+ void call_reconstruct_csr_with_categ
2472
+ (
2473
+ Rcpp::NumericVector orig_Xr, Rcpp::IntegerVector orig_Xr_ind, Rcpp::IntegerVector orig_Xr_indptr,
2474
+ Rcpp::NumericVector rec_Xr, Rcpp::IntegerVector rec_Xr_ind, Rcpp::IntegerVector rec_Xr_indptr,
2475
+ Rcpp::IntegerVector rec_X_cat,
2476
+ Rcpp::IntegerVector cols_numeric, Rcpp::IntegerVector cols_categ,
2477
+ size_t nrows, size_t ncols
2478
+ )
2479
+ {
2480
+ reconstruct_csr_with_categ<double, int, int>(
2481
+ REAL(orig_Xr), INTEGER(orig_Xr_ind), INTEGER(orig_Xr_indptr),
2482
+ REAL(rec_Xr), INTEGER(rec_Xr_ind), INTEGER(rec_Xr_indptr),
2483
+ INTEGER(rec_X_cat), true,
2484
+ INTEGER(cols_numeric), INTEGER(cols_categ),
2485
+ nrows, ncols, cols_numeric.size(), cols_categ.size()
2486
+ );
2487
+ }
2488
+
2489
+ // [[Rcpp::export(rng = false)]]
2490
+ Rcpp::NumericVector deepcopy_vector(Rcpp::NumericVector inp)
2491
+ {
2492
+ return Rcpp::NumericVector(inp.begin(), inp.end());
2493
+ }
2494
+
2495
+ Rcpp::IntegerMatrix csc_to_dense_int
2496
+ (
2497
+ Rcpp::NumericVector Xc,
2498
+ Rcpp::IntegerVector Xc_ind,
2499
+ Rcpp::IntegerVector Xc_indptr,
2500
+ size_t nrows
2501
+ )
2502
+ {
2503
+ size_t ncols = Xc_indptr.size() - 1;
2504
+ Rcpp::IntegerMatrix out_(nrows, ncols);
2505
+ int *restrict out = INTEGER(out_);
2506
+ for (size_t col = 0; col < ncols; col++)
2507
+ {
2508
+ for (auto ix = Xc_indptr[col]; ix < Xc_indptr[col+1]; ix++)
2509
+ out[(size_t)Xc_ind[ix] + col*nrows]
2510
+ =
2511
+ (Xc[ix] >= 0 && !ISNAN(Xc[ix]))?
2512
+ (int)Xc[ix] : (int)(-1);
2513
+ }
2514
+ return out_;
2515
+ }
2516
+
2517
+ template <class real_vec, class int_vec>
2518
+ Rcpp::IntegerMatrix csr_to_dense_int
2519
+ (
2520
+ real_vec Xr,
2521
+ int_vec Xr_ind,
2522
+ int_vec Xr_indptr,
2523
+ int ncols
2524
+ )
2525
+ {
2526
+ size_t nrows = Xr_indptr.size() - 1;
2527
+ size_t matrix_dims[] = {nrows, (size_t)ncols};
2528
+ Rcpp::IntegerMatrix out_ = Rcpp::unwindProtect(safe_int_matrix, (void*)matrix_dims);
2529
+ int *restrict out = INTEGER(out_);
2530
+ for (size_t row = 0; row < nrows; row++)
2531
+ {
2532
+ for (auto ix = Xr_indptr[row]; ix < Xr_indptr[row+1]; ix++)
2533
+ out[row + (size_t)Xr_ind[ix]*nrows]
2534
+ =
2535
+ (Xr[ix] >= 0 && !ISNAN(Xr[ix]))?
2536
+ (int)Xr[ix] : (int)(-1);
2537
+ }
2538
+ return out_;
2539
+ }
2540
+
2541
+ // [[Rcpp::export(rng = false)]]
2542
+ Rcpp::List call_take_cols_by_slice_csr
2543
+ (
2544
+ Rcpp::NumericVector Xr_,
2545
+ Rcpp::IntegerVector Xr_ind_,
2546
+ Rcpp::IntegerVector Xr_indptr,
2547
+ int ncols_take,
2548
+ bool as_dense
2549
+ )
2550
+ {
2551
+ /* Indices need to be sorted beforehand */
2552
+ double *restrict Xr = REAL(Xr_);
2553
+ int *restrict Xr_ind = INTEGER(Xr_ind_);
2554
+ size_t nrows = Xr_indptr.size() - 1;
2555
+ Rcpp::IntegerVector out_Xr_indptr(nrows+1);
2556
+ out_Xr_indptr[0] = 0;
2557
+ size_t total_size = 0;
2558
+ for (size_t row = 0; row < nrows; row++)
2559
+ {
2560
+ for (auto col = Xr_indptr[row]; col < Xr_indptr[row+1]; col++)
2561
+ total_size += Xr_ind[col] < ncols_take;
2562
+ out_Xr_indptr[row+1] = total_size;
2563
+ }
2564
+
2565
+ Rcpp::NumericVector out_Xr_(total_size);
2566
+ Rcpp::IntegerVector out_Xr_ind_(total_size);
2567
+ double *restrict out_Xr = REAL(out_Xr_);
2568
+ int *restrict out_Xr_ind = INTEGER(out_Xr_ind_);
2569
+
2570
+ size_t n_this;
2571
+ for (size_t row = 0; row < nrows; row++)
2572
+ {
2573
+ n_this = out_Xr_indptr[row+1] - out_Xr_indptr[row];
2574
+ if (n_this) {
2575
+ std::copy(Xr + Xr_indptr[row],
2576
+ Xr + Xr_indptr[row] + n_this,
2577
+ out_Xr + out_Xr_indptr[row]);
2578
+ std::copy(Xr_ind + Xr_indptr[row],
2579
+ Xr_ind + Xr_indptr[row] + n_this,
2580
+ out_Xr_ind + out_Xr_indptr[row]);
2581
+ }
2582
+ }
2583
+
2584
+ if (!as_dense)
2585
+ return Rcpp::List::create(
2586
+ Rcpp::_["Xr"] = out_Xr_,
2587
+ Rcpp::_["Xr_ind"] = out_Xr_ind_,
2588
+ Rcpp::_["Xr_indptr"] = out_Xr_indptr
2589
+ );
2590
+ else
2591
+ return Rcpp::List::create(
2592
+ Rcpp::_["X_cat"] = csr_to_dense_int(out_Xr_,
2593
+ out_Xr_ind_,
2594
+ out_Xr_indptr,
2595
+ ncols_take)
2596
+ );
2597
+ }
2598
+
2599
+ // [[Rcpp::export(rng = false)]]
2600
+ Rcpp::List call_take_cols_by_index_csr
2601
+ (
2602
+ Rcpp::NumericVector Xr,
2603
+ Rcpp::IntegerVector Xr_ind,
2604
+ Rcpp::IntegerVector Xr_indptr,
2605
+ Rcpp::IntegerVector cols_take,
2606
+ bool as_dense
2607
+ )
2608
+ {
2609
+ Rcpp::List out;
2610
+ if (!as_dense) {
2611
+ out = Rcpp::List::create(
2612
+ Rcpp::_["Xr"] = R_NilValue,
2613
+ Rcpp::_["Xr_ind"] = R_NilValue,
2614
+ Rcpp::_["Xr_indptr"] = R_NilValue
2615
+ );
2616
+ }
2617
+ else {
2618
+ out = Rcpp::List::create(
2619
+ Rcpp::_["X_cat"] = R_NilValue
2620
+ );
2621
+ }
2622
+
2623
+
2624
+ /* 'cols_take' should be sorted */
2625
+ int n_take = cols_take.size();
2626
+ int nrows = Xr_indptr.size() - 1;
2627
+ std::vector<double> out_Xr;
2628
+ std::vector<int> out_Xr_ind;
2629
+ std::vector<int> out_Xr_indptr(nrows + 1);
2630
+
2631
+ int *curr_ptr;
2632
+ int *end_ptr;
2633
+ int *restrict ptr_Xr_ind = INTEGER(Xr_ind);
2634
+ int *restrict ptr_cols_take = INTEGER(cols_take);
2635
+ int *restrict ptr_cols_take_end = ptr_cols_take + n_take;
2636
+ int curr_col;
2637
+ int *search_res;
2638
+
2639
+ for (int row = 0; row < nrows; row++)
2640
+ {
2641
+ curr_ptr = ptr_Xr_ind + Xr_indptr[row];
2642
+ end_ptr = ptr_Xr_ind + Xr_indptr[row+1];
2643
+ curr_col = 0;
2644
+
2645
+ if (end_ptr == curr_ptr + 1)
2646
+ {
2647
+ search_res = std::lower_bound(ptr_cols_take, ptr_cols_take_end, *curr_ptr);
2648
+ curr_col = std::distance(ptr_cols_take, search_res);
2649
+ if (curr_col < n_take && *search_res == *curr_ptr)
2650
+ {
2651
+ out_Xr.push_back(Xr[std::distance(ptr_Xr_ind, curr_ptr)]);
2652
+ out_Xr_ind.push_back(curr_col);
2653
+ }
2654
+ }
2655
+
2656
+ else
2657
+ if (end_ptr > curr_ptr)
2658
+ {
2659
+ while (true)
2660
+ {
2661
+ curr_ptr = std::lower_bound(curr_ptr, end_ptr, ptr_cols_take[curr_col]);
2662
+
2663
+ if (curr_ptr >= end_ptr)
2664
+ {
2665
+ break;
2666
+ }
2667
+
2668
+
2669
+ else if (*curr_ptr == ptr_cols_take[curr_col])
2670
+ {
2671
+ out_Xr.push_back(Xr[std::distance(ptr_Xr_ind, curr_ptr)]);
2672
+ out_Xr_ind.push_back(curr_col);
2673
+ curr_ptr++;
2674
+ curr_col++;
2675
+
2676
+ if (curr_ptr >= end_ptr || curr_col >= n_take)
2677
+ break;
2678
+ }
2679
+
2680
+
2681
+ else
2682
+ {
2683
+ curr_col = std::distance(
2684
+ ptr_cols_take,
2685
+ std::lower_bound(ptr_cols_take + curr_col, ptr_cols_take_end, *curr_ptr)
2686
+ );
2687
+
2688
+ if (curr_col >= n_take)
2689
+ break;
2690
+
2691
+ if (curr_col == *curr_ptr) {
2692
+ out_Xr.push_back(Xr[std::distance(ptr_Xr_ind, curr_ptr)]);
2693
+ out_Xr_ind.push_back(curr_col);
2694
+ curr_ptr++;
2695
+ curr_col++;
2696
+ }
2697
+
2698
+ if (curr_ptr >= end_ptr || curr_col >= n_take)
2699
+ break;
2700
+ }
2701
+ }
2702
+ }
2703
+
2704
+ out_Xr_indptr[row+1] = out_Xr.size();
2705
+ }
2706
+
2707
+ if (!as_dense)
2708
+ {
2709
+ out["Xr"] = Rcpp::unwindProtect(safe_copy_vec, (void*)&out_Xr);
2710
+ out["Xr_ind"] = Rcpp::unwindProtect(safe_copy_intvec, (void*)&out_Xr_ind);
2711
+ out["Xr_indptr"] = Rcpp::unwindProtect(safe_copy_intvec, (void*)&out_Xr_indptr);
2712
+ }
2713
+
2714
+ else
2715
+ {
2716
+ out["X_cat"] = csr_to_dense_int(out_Xr,
2717
+ out_Xr_ind,
2718
+ out_Xr_indptr,
2719
+ n_take);
2720
+ }
2721
+
2722
+ return out;
2723
+ }
2724
+
2725
+ // [[Rcpp::export(rng = false)]]
2726
+ Rcpp::List call_take_cols_by_slice_csc
2727
+ (
2728
+ Rcpp::NumericVector Xc,
2729
+ Rcpp::IntegerVector Xc_ind,
2730
+ Rcpp::IntegerVector Xc_indptr,
2731
+ size_t ncols_take,
2732
+ bool as_dense, size_t nrows
2733
+ )
2734
+ {
2735
+ Rcpp::IntegerVector out_Xc_indptr(ncols_take+1);
2736
+ size_t total_size = Xc_indptr[ncols_take+1];
2737
+ Rcpp::NumericVector out_Xc(REAL(Xc), REAL(Xc) + total_size);
2738
+ Rcpp::IntegerVector out_Xc_ind(INTEGER(Xc_ind), INTEGER(Xc_ind) + total_size);
2739
+
2740
+ if (!as_dense)
2741
+ return Rcpp::List::create(
2742
+ Rcpp::_["Xc"] = out_Xc,
2743
+ Rcpp::_["Xc_ind"] = out_Xc_ind,
2744
+ Rcpp::_["Xc_indptr"] = out_Xc_indptr
2745
+ );
2746
+ else
2747
+ return Rcpp::List::create(
2748
+ Rcpp::_["X_cat"] = csc_to_dense_int(out_Xc,
2749
+ out_Xc_ind,
2750
+ out_Xc_indptr,
2751
+ nrows)
2752
+ );
2753
+ }
2754
+
2755
+ // [[Rcpp::export(rng = false)]]
2756
+ Rcpp::List call_take_cols_by_index_csc
2757
+ (
2758
+ Rcpp::NumericVector Xc_,
2759
+ Rcpp::IntegerVector Xc_ind_,
2760
+ Rcpp::IntegerVector Xc_indptr,
2761
+ Rcpp::IntegerVector cols_take,
2762
+ bool as_dense, size_t nrows
2763
+ )
2764
+ {
2765
+ /* 'cols_take' should be sorted */
2766
+ double *restrict Xc = REAL(Xc_);
2767
+ int *restrict Xc_ind = INTEGER(Xc_ind_);
2768
+ size_t n_take = cols_take.size();
2769
+ Rcpp::IntegerVector out_Xc_indptr(n_take+1);
2770
+ size_t total_size = 0;
2771
+
2772
+ for (size_t col = 0; col < n_take; col++)
2773
+ total_size += Xc_indptr[cols_take[col]+1] - Xc_indptr[cols_take[col]];
2774
+
2775
+ Rcpp::NumericVector out_Xc_(total_size);
2776
+ Rcpp::IntegerVector out_Xc_ind_(total_size);
2777
+ double *restrict out_Xc = REAL(out_Xc_);
2778
+ int *restrict out_Xc_ind = INTEGER(out_Xc_ind_);
2779
+
2780
+ total_size = 0;
2781
+ size_t n_this;
2782
+ out_Xc_indptr[0] = 0;
2783
+ for (size_t col = 0; col < n_take; col++)
2784
+ {
2785
+ n_this = Xc_indptr[cols_take[col]+1] - Xc_indptr[cols_take[col]];
2786
+ if (n_this) {
2787
+ std::copy(Xc + Xc_indptr[cols_take[col]],
2788
+ Xc + Xc_indptr[cols_take[col]] + n_this,
2789
+ out_Xc + total_size);
2790
+ std::copy(Xc_ind + Xc_indptr[cols_take[col]],
2791
+ Xc_ind + Xc_indptr[cols_take[col]] + n_this,
2792
+ out_Xc_ind + total_size);
2793
+ }
2794
+ total_size += n_this;
2795
+ out_Xc_indptr[col+1] = total_size;
2796
+ }
2797
+
2798
+ if (!as_dense)
2799
+ return Rcpp::List::create(
2800
+ Rcpp::_["Xc"] = out_Xc_,
2801
+ Rcpp::_["Xc_ind"] = out_Xc_ind_,
2802
+ Rcpp::_["Xc_indptr"] = out_Xc_indptr
2803
+ );
2804
+ else
2805
+ return Rcpp::List::create(
2806
+ Rcpp::_["X_cat"] = csc_to_dense_int(out_Xc_,
2807
+ out_Xc_ind_,
2808
+ out_Xc_indptr,
2809
+ nrows)
2810
+ );
2811
+ }
2812
+
2813
+ // [[Rcpp::export(rng = false)]]
2814
+ void copy_csc_cols_by_slice
2815
+ (
2816
+ Rcpp::NumericVector out_Xc_,
2817
+ Rcpp::IntegerVector out_Xc_indptr,
2818
+ Rcpp::NumericVector from_Xc_,
2819
+ Rcpp::IntegerVector from_Xc_indptr,
2820
+ size_t n_copy
2821
+ )
2822
+ {
2823
+ size_t total_size = from_Xc_indptr[n_copy+1];
2824
+ std::copy(REAL(from_Xc_), REAL(from_Xc_) + total_size, REAL(out_Xc_));
2825
+ }
2826
+
2827
+ // [[Rcpp::export(rng = false)]]
2828
+ void copy_csc_cols_by_index
2829
+ (
2830
+ Rcpp::NumericVector out_Xc_,
2831
+ Rcpp::IntegerVector out_Xc_indptr,
2832
+ Rcpp::NumericVector from_Xc_,
2833
+ Rcpp::IntegerVector from_Xc_indptr,
2834
+ Rcpp::IntegerVector cols_copy
2835
+ )
2836
+ {
2837
+ size_t n_copy = cols_copy.size();
2838
+ double *restrict out_Xc = REAL(out_Xc_);
2839
+ double *restrict from_Xc = REAL(from_Xc_);
2840
+
2841
+ for (size_t col = 0; col < n_copy; col++)
2842
+ {
2843
+ std::copy(from_Xc + from_Xc_indptr[col],
2844
+ from_Xc + from_Xc_indptr[col+1],
2845
+ out_Xc + out_Xc_indptr[cols_copy[col]]);
2846
+ }
2847
+ }
2848
+
2849
+
2850
+ // [[Rcpp::export(rng = false)]]
2851
+ Rcpp::List assign_csc_cols
2852
+ (
2853
+ Rcpp::NumericVector Xc_,
2854
+ Rcpp::IntegerVector Xc_ind_,
2855
+ Rcpp::IntegerVector Xc_indptr,
2856
+ Rcpp::IntegerVector X_cat_,
2857
+ Rcpp::IntegerVector cols_categ,
2858
+ Rcpp::IntegerVector cols_numeric,
2859
+ size_t nrows
2860
+ )
2861
+ {
2862
+ Rcpp::List out = Rcpp::List::create(
2863
+ Rcpp::_["Xc"] = R_NilValue,
2864
+ Rcpp::_["Xc_ind"] = R_NilValue,
2865
+ Rcpp::_["Xc_indptr"] = R_NilValue
2866
+ );
2867
+ size_t ncols_tot = (size_t)cols_categ.size() + (size_t)cols_numeric.size();
2868
+ std::vector<double> out_Xc;
2869
+ std::vector<int> out_Xc_ind;
2870
+ std::vector<int> out_Xc_indptr(ncols_tot + 1);
2871
+
2872
+ double *restrict Xc = REAL(Xc_);
2873
+ int *restrict Xc_ind = INTEGER(Xc_ind_);
2874
+ int *restrict X_cat = INTEGER(X_cat_);
2875
+
2876
+ hashed_set<int> cols_categ_set(INTEGER(cols_categ), INTEGER(cols_categ) + cols_categ.size());
2877
+ hashed_set<int> cols_numeric_set(INTEGER(cols_numeric), INTEGER(cols_numeric) + cols_numeric.size());
2878
+
2879
+ size_t curr_num = 0;
2880
+ size_t curr_cat = 0;
2881
+ bool has_zeros;
2882
+ size_t curr_size;
2883
+
2884
+ for (size_t col = 0; col < ncols_tot; col++)
2885
+ {
2886
+ if (is_in_set((int)col, cols_numeric_set))
2887
+ {
2888
+ std::copy(Xc + Xc_indptr[curr_num],
2889
+ Xc + Xc_indptr[curr_num+1],
2890
+ std::back_inserter(out_Xc));
2891
+ std::copy(Xc_ind + Xc_indptr[curr_num],
2892
+ Xc_ind + Xc_indptr[curr_num+1],
2893
+ std::back_inserter(out_Xc_ind));
2894
+ curr_num++;
2895
+ }
2896
+
2897
+ else if (is_in_set((int)col, cols_categ_set))
2898
+ {
2899
+ has_zeros = false;
2900
+ for (size_t row = 0; row < nrows; row++)
2901
+ if (X_cat[row + (size_t)curr_cat*nrows] == 0)
2902
+ has_zeros = true;
2903
+
2904
+ if (!has_zeros) {
2905
+ std::copy(X_cat + (size_t)curr_cat*nrows,
2906
+ X_cat + ((size_t)curr_cat+1)*nrows,
2907
+ std::back_inserter(out_Xc));
2908
+ curr_size = out_Xc_ind.size();
2909
+ out_Xc_ind.resize(curr_size + (size_t)nrows);
2910
+ std::iota(out_Xc_ind.begin() + curr_size, out_Xc_ind.end(), (int)0);
2911
+ }
2912
+
2913
+ else {
2914
+ for (size_t row = 0; row < nrows; row++) {
2915
+ if (X_cat[row + (size_t)curr_cat*nrows] > 0) {
2916
+ out_Xc.push_back(X_cat[row + (size_t)curr_cat*nrows]);
2917
+ out_Xc_ind.push_back((int)row);
2918
+ }
2919
+ }
2920
+ }
2921
+
2922
+ curr_cat++;
2923
+ }
2924
+
2925
+ out_Xc_indptr[col+1] = out_Xc.size();
2926
+ }
2927
+
2928
+
2929
+ out["Xc"] = Rcpp::unwindProtect(safe_copy_vec, (void*)&out_Xc);
2930
+ out["Xc_ind"] = Rcpp::unwindProtect(safe_copy_intvec, (void*)&out_Xc_ind);
2931
+ out["Xc_indptr"] = Rcpp::unwindProtect(safe_copy_intvec, (void*)&out_Xc_indptr);
2932
+ return out;
2933
+ }
2934
+
2935
+ /* These are helpers for dealing with large integers and R's copy-on-write semantics */
2936
+
2937
+ // [[Rcpp::export(rng = false)]]
2938
+ Rcpp::NumericVector get_empty_tmat(int nrows_)
2939
+ {
2940
+ size_t nrows = (size_t)nrows_;
2941
+ size_t tmat_size = (nrows * (nrows - (size_t)1)) / (size_t)2;
2942
+ return Rcpp::NumericVector((R_xlen_t)tmat_size);
2943
+ }
2944
+
2945
+ // [[Rcpp::export(rng = false)]]
2946
+ Rcpp::IntegerMatrix get_empty_int_mat(int nrows, int ncols)
2947
+ {
2948
+ return Rcpp::IntegerMatrix(nrows, ncols);
2949
+ }
2950
+
2951
+ // [[Rcpp::export(rng = false)]]
2952
+ Rcpp::IntegerMatrix get_null_int_mat()
2953
+ {
2954
+ return Rcpp::IntegerMatrix(0, 0);
2955
+ }
2956
+
2957
+ // [[Rcpp::export(rng = false)]]
2958
+ int get_ntrees(SEXP model_R_ptr, bool is_extended)
2959
+ {
2960
+ if (is_extended) {
2961
+ const ExtIsoForest* ext_model_ptr = static_cast<const ExtIsoForest*>(R_ExternalPtrAddr(model_R_ptr));
2962
+ return ext_model_ptr->hplanes.size();
2963
+ }
2964
+
2965
+ else {
2966
+ const IsoForest* model_ptr = static_cast<const IsoForest*>(R_ExternalPtrAddr(model_R_ptr));
2967
+ return model_ptr->trees.size();
2968
+ }
2969
+ }
2970
+
2971
+ // [[Rcpp::export(rng = false)]]
2972
+ SEXP deepcopy_int(SEXP x)
2973
+ {
2974
+ return Rf_ScalarInteger(Rf_asInteger(x));
2975
+ }
2976
+
2977
+ // [[Rcpp::export(rng = false)]]
2978
+ void modify_R_list_inplace(SEXP lst, int ix, SEXP el)
2979
+ {
2980
+ SET_VECTOR_ELT(lst, ix, el);
2981
+ }
2982
+
2983
+ // [[Rcpp::export(rng = false)]]
2984
+ void addto_R_list_inplace(Rcpp::List &lst, Rcpp::String nm, SEXP el)
2985
+ {
2986
+ lst[nm] = el;
2987
+ }
2988
+
2989
+
2990
+ // [[Rcpp::export(rng = false)]]
2991
+ bool R_has_openmp()
2992
+ {
2993
+ #ifdef _OPENMP
2994
+ return true;
2995
+ #else
2996
+ return false;
2997
+ #endif
850
2998
  }
851
2999
 
852
3000
  #endif /* _FOR_R */