isotree 0.2.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -1
- data/LICENSE.txt +2 -2
- data/README.md +32 -14
- data/ext/isotree/ext.cpp +144 -31
- data/ext/isotree/extconf.rb +7 -7
- data/lib/isotree/isolation_forest.rb +110 -30
- data/lib/isotree/version.rb +1 -1
- data/vendor/isotree/LICENSE +1 -1
- data/vendor/isotree/README.md +165 -27
- data/vendor/isotree/include/isotree.hpp +2111 -0
- data/vendor/isotree/include/isotree_oop.hpp +394 -0
- data/vendor/isotree/inst/COPYRIGHTS +62 -0
- data/vendor/isotree/src/RcppExports.cpp +525 -52
- data/vendor/isotree/src/Rwrapper.cpp +1931 -268
- data/vendor/isotree/src/c_interface.cpp +953 -0
- data/vendor/isotree/src/crit.hpp +4232 -0
- data/vendor/isotree/src/dist.hpp +1886 -0
- data/vendor/isotree/src/exp_depth_table.hpp +134 -0
- data/vendor/isotree/src/extended.hpp +1444 -0
- data/vendor/isotree/src/external_facing_generic.hpp +399 -0
- data/vendor/isotree/src/fit_model.hpp +2401 -0
- data/vendor/isotree/src/{dealloc.cpp → headers_joined.hpp} +38 -22
- data/vendor/isotree/src/helpers_iforest.hpp +813 -0
- data/vendor/isotree/src/{impute.cpp → impute.hpp} +353 -122
- data/vendor/isotree/src/indexer.cpp +515 -0
- data/vendor/isotree/src/instantiate_template_headers.cpp +118 -0
- data/vendor/isotree/src/instantiate_template_headers.hpp +240 -0
- data/vendor/isotree/src/isoforest.hpp +1659 -0
- data/vendor/isotree/src/isotree.hpp +1804 -392
- data/vendor/isotree/src/isotree_exportable.hpp +99 -0
- data/vendor/isotree/src/merge_models.cpp +159 -16
- data/vendor/isotree/src/mult.hpp +1321 -0
- data/vendor/isotree/src/oop_interface.cpp +842 -0
- data/vendor/isotree/src/oop_interface.hpp +278 -0
- data/vendor/isotree/src/other_helpers.hpp +219 -0
- data/vendor/isotree/src/predict.hpp +1932 -0
- data/vendor/isotree/src/python_helpers.hpp +134 -0
- data/vendor/isotree/src/ref_indexer.hpp +154 -0
- data/vendor/isotree/src/robinmap/LICENSE +21 -0
- data/vendor/isotree/src/robinmap/README.md +483 -0
- data/vendor/isotree/src/robinmap/include/tsl/robin_growth_policy.h +406 -0
- data/vendor/isotree/src/robinmap/include/tsl/robin_hash.h +1620 -0
- data/vendor/isotree/src/robinmap/include/tsl/robin_map.h +807 -0
- data/vendor/isotree/src/robinmap/include/tsl/robin_set.h +660 -0
- data/vendor/isotree/src/serialize.cpp +4300 -139
- data/vendor/isotree/src/sql.cpp +141 -59
- data/vendor/isotree/src/subset_models.cpp +174 -0
- data/vendor/isotree/src/utils.hpp +3808 -0
- data/vendor/isotree/src/xoshiro.hpp +467 -0
- data/vendor/isotree/src/ziggurat.hpp +405 -0
- metadata +38 -104
- data/vendor/cereal/LICENSE +0 -24
- data/vendor/cereal/README.md +0 -85
- data/vendor/cereal/include/cereal/access.hpp +0 -351
- data/vendor/cereal/include/cereal/archives/adapters.hpp +0 -163
- data/vendor/cereal/include/cereal/archives/binary.hpp +0 -169
- data/vendor/cereal/include/cereal/archives/json.hpp +0 -1019
- data/vendor/cereal/include/cereal/archives/portable_binary.hpp +0 -334
- data/vendor/cereal/include/cereal/archives/xml.hpp +0 -956
- data/vendor/cereal/include/cereal/cereal.hpp +0 -1089
- data/vendor/cereal/include/cereal/details/helpers.hpp +0 -422
- data/vendor/cereal/include/cereal/details/polymorphic_impl.hpp +0 -796
- data/vendor/cereal/include/cereal/details/polymorphic_impl_fwd.hpp +0 -65
- data/vendor/cereal/include/cereal/details/static_object.hpp +0 -127
- data/vendor/cereal/include/cereal/details/traits.hpp +0 -1411
- data/vendor/cereal/include/cereal/details/util.hpp +0 -84
- data/vendor/cereal/include/cereal/external/base64.hpp +0 -134
- data/vendor/cereal/include/cereal/external/rapidjson/allocators.h +0 -284
- data/vendor/cereal/include/cereal/external/rapidjson/cursorstreamwrapper.h +0 -78
- data/vendor/cereal/include/cereal/external/rapidjson/document.h +0 -2652
- data/vendor/cereal/include/cereal/external/rapidjson/encodedstream.h +0 -299
- data/vendor/cereal/include/cereal/external/rapidjson/encodings.h +0 -716
- data/vendor/cereal/include/cereal/external/rapidjson/error/en.h +0 -74
- data/vendor/cereal/include/cereal/external/rapidjson/error/error.h +0 -161
- data/vendor/cereal/include/cereal/external/rapidjson/filereadstream.h +0 -99
- data/vendor/cereal/include/cereal/external/rapidjson/filewritestream.h +0 -104
- data/vendor/cereal/include/cereal/external/rapidjson/fwd.h +0 -151
- data/vendor/cereal/include/cereal/external/rapidjson/internal/biginteger.h +0 -290
- data/vendor/cereal/include/cereal/external/rapidjson/internal/diyfp.h +0 -271
- data/vendor/cereal/include/cereal/external/rapidjson/internal/dtoa.h +0 -245
- data/vendor/cereal/include/cereal/external/rapidjson/internal/ieee754.h +0 -78
- data/vendor/cereal/include/cereal/external/rapidjson/internal/itoa.h +0 -308
- data/vendor/cereal/include/cereal/external/rapidjson/internal/meta.h +0 -186
- data/vendor/cereal/include/cereal/external/rapidjson/internal/pow10.h +0 -55
- data/vendor/cereal/include/cereal/external/rapidjson/internal/regex.h +0 -740
- data/vendor/cereal/include/cereal/external/rapidjson/internal/stack.h +0 -232
- data/vendor/cereal/include/cereal/external/rapidjson/internal/strfunc.h +0 -69
- data/vendor/cereal/include/cereal/external/rapidjson/internal/strtod.h +0 -290
- data/vendor/cereal/include/cereal/external/rapidjson/internal/swap.h +0 -46
- data/vendor/cereal/include/cereal/external/rapidjson/istreamwrapper.h +0 -128
- data/vendor/cereal/include/cereal/external/rapidjson/memorybuffer.h +0 -70
- data/vendor/cereal/include/cereal/external/rapidjson/memorystream.h +0 -71
- data/vendor/cereal/include/cereal/external/rapidjson/msinttypes/inttypes.h +0 -316
- data/vendor/cereal/include/cereal/external/rapidjson/msinttypes/stdint.h +0 -300
- data/vendor/cereal/include/cereal/external/rapidjson/ostreamwrapper.h +0 -81
- data/vendor/cereal/include/cereal/external/rapidjson/pointer.h +0 -1414
- data/vendor/cereal/include/cereal/external/rapidjson/prettywriter.h +0 -277
- data/vendor/cereal/include/cereal/external/rapidjson/rapidjson.h +0 -656
- data/vendor/cereal/include/cereal/external/rapidjson/reader.h +0 -2230
- data/vendor/cereal/include/cereal/external/rapidjson/schema.h +0 -2497
- data/vendor/cereal/include/cereal/external/rapidjson/stream.h +0 -223
- data/vendor/cereal/include/cereal/external/rapidjson/stringbuffer.h +0 -121
- data/vendor/cereal/include/cereal/external/rapidjson/writer.h +0 -709
- data/vendor/cereal/include/cereal/external/rapidxml/license.txt +0 -52
- data/vendor/cereal/include/cereal/external/rapidxml/manual.html +0 -406
- data/vendor/cereal/include/cereal/external/rapidxml/rapidxml.hpp +0 -2624
- data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_iterators.hpp +0 -175
- data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_print.hpp +0 -428
- data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_utils.hpp +0 -123
- data/vendor/cereal/include/cereal/macros.hpp +0 -154
- data/vendor/cereal/include/cereal/specialize.hpp +0 -139
- data/vendor/cereal/include/cereal/types/array.hpp +0 -79
- data/vendor/cereal/include/cereal/types/atomic.hpp +0 -55
- data/vendor/cereal/include/cereal/types/base_class.hpp +0 -203
- data/vendor/cereal/include/cereal/types/bitset.hpp +0 -176
- data/vendor/cereal/include/cereal/types/boost_variant.hpp +0 -164
- data/vendor/cereal/include/cereal/types/chrono.hpp +0 -72
- data/vendor/cereal/include/cereal/types/common.hpp +0 -129
- data/vendor/cereal/include/cereal/types/complex.hpp +0 -56
- data/vendor/cereal/include/cereal/types/concepts/pair_associative_container.hpp +0 -73
- data/vendor/cereal/include/cereal/types/deque.hpp +0 -62
- data/vendor/cereal/include/cereal/types/forward_list.hpp +0 -68
- data/vendor/cereal/include/cereal/types/functional.hpp +0 -43
- data/vendor/cereal/include/cereal/types/list.hpp +0 -62
- data/vendor/cereal/include/cereal/types/map.hpp +0 -36
- data/vendor/cereal/include/cereal/types/memory.hpp +0 -425
- data/vendor/cereal/include/cereal/types/optional.hpp +0 -66
- data/vendor/cereal/include/cereal/types/polymorphic.hpp +0 -483
- data/vendor/cereal/include/cereal/types/queue.hpp +0 -132
- data/vendor/cereal/include/cereal/types/set.hpp +0 -103
- data/vendor/cereal/include/cereal/types/stack.hpp +0 -76
- data/vendor/cereal/include/cereal/types/string.hpp +0 -61
- data/vendor/cereal/include/cereal/types/tuple.hpp +0 -123
- data/vendor/cereal/include/cereal/types/unordered_map.hpp +0 -36
- data/vendor/cereal/include/cereal/types/unordered_set.hpp +0 -99
- data/vendor/cereal/include/cereal/types/utility.hpp +0 -47
- data/vendor/cereal/include/cereal/types/valarray.hpp +0 -89
- data/vendor/cereal/include/cereal/types/variant.hpp +0 -109
- data/vendor/cereal/include/cereal/types/vector.hpp +0 -112
- data/vendor/cereal/include/cereal/version.hpp +0 -52
- data/vendor/isotree/src/Makevars +0 -4
- data/vendor/isotree/src/crit.cpp +0 -912
- data/vendor/isotree/src/dist.cpp +0 -749
- data/vendor/isotree/src/extended.cpp +0 -790
- data/vendor/isotree/src/fit_model.cpp +0 -1090
- data/vendor/isotree/src/helpers_iforest.cpp +0 -324
- data/vendor/isotree/src/isoforest.cpp +0 -771
- data/vendor/isotree/src/mult.cpp +0 -607
- data/vendor/isotree/src/predict.cpp +0 -853
- data/vendor/isotree/src/utils.cpp +0 -1566
|
@@ -18,11 +18,29 @@
|
|
|
18
18
|
* [5] https://sourceforge.net/projects/iforest/
|
|
19
19
|
* [6] https://math.stackexchange.com/questions/3388518/expected-number-of-paths-required-to-separate-elements-in-a-binary-tree
|
|
20
20
|
* [7] Quinlan, J. Ross. C4. 5: programs for machine learning. Elsevier, 2014.
|
|
21
|
-
* [8] Cortes, David.
|
|
22
|
-
*
|
|
21
|
+
* [8] Cortes, David.
|
|
22
|
+
* "Distance approximation using Isolation Forests."
|
|
23
|
+
* arXiv preprint arXiv:1910.12362 (2019).
|
|
24
|
+
* [9] Cortes, David.
|
|
25
|
+
* "Imputing missing values with unsupervised random trees."
|
|
26
|
+
* arXiv preprint arXiv:1911.06646 (2019).
|
|
27
|
+
* [10] https://math.stackexchange.com/questions/3333220/expected-average-depth-in-random-binary-tree-constructed-top-to-bottom
|
|
28
|
+
* [11] Cortes, David.
|
|
29
|
+
* "Revisiting randomized choices in isolation forests."
|
|
30
|
+
* arXiv preprint arXiv:2110.13402 (2021).
|
|
31
|
+
* [12] Guha, Sudipto, et al.
|
|
32
|
+
* "Robust random cut forest based anomaly detection on streams."
|
|
33
|
+
* International conference on machine learning. PMLR, 2016.
|
|
34
|
+
* [13] Cortes, David.
|
|
35
|
+
* "Isolation forests: looking beyond tree depth."
|
|
36
|
+
* arXiv preprint arXiv:2111.11639 (2021).
|
|
37
|
+
* [14] Ting, Kai Ming, Yue Zhu, and Zhi-Hua Zhou.
|
|
38
|
+
* "Isolation kernel and its effect on SVM"
|
|
39
|
+
* Proceedings of the 24th ACM SIGKDD
|
|
40
|
+
* International Conference on Knowledge Discovery & Data Mining. 2018.
|
|
23
41
|
*
|
|
24
42
|
* BSD 2-Clause License
|
|
25
|
-
* Copyright (c)
|
|
43
|
+
* Copyright (c) 2019-2022, David Cortes
|
|
26
44
|
* All rights reserved.
|
|
27
45
|
* Redistribution and use in source and binary forms, with or without
|
|
28
46
|
* modification, are permitted provided that the following conditions are met:
|
|
@@ -45,76 +63,138 @@
|
|
|
45
63
|
#ifdef _FOR_R
|
|
46
64
|
|
|
47
65
|
#include <Rcpp.h>
|
|
48
|
-
|
|
66
|
+
#include <Rcpp/unwindProtect.h>
|
|
67
|
+
// [[Rcpp::plugins(unwindProtect)]]
|
|
68
|
+
#include <Rinternals.h>
|
|
49
69
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
#
|
|
53
|
-
#include <cereal/types/vector.hpp>
|
|
54
|
-
#include <sstream>
|
|
55
|
-
#include <string>
|
|
70
|
+
#ifndef _FOR_R
|
|
71
|
+
#define FOR_R
|
|
72
|
+
#endif
|
|
56
73
|
|
|
57
74
|
/* This is the package's header */
|
|
58
75
|
#include "isotree.hpp"
|
|
59
76
|
|
|
77
|
+
/* Library is templated, base R comes with only these 2 types though */
|
|
78
|
+
#include "headers_joined.hpp"
|
|
79
|
+
#define real_t double
|
|
80
|
+
#define sparse_ix int
|
|
81
|
+
#include "instantiate_template_headers.hpp"
|
|
82
|
+
|
|
83
|
+
/* For imputing CSR matrices with differing columns from input */
|
|
84
|
+
#include "other_helpers.hpp"
|
|
85
|
+
|
|
86
|
+
/* Note: the R version calls the 'sort_csc_indices' templated function,
|
|
87
|
+
so it's not enough to just include 'isotree_exportable.hpp' and let
|
|
88
|
+
the templates be instantiated elsewhere. */
|
|
89
|
+
|
|
90
|
+
#define throw_mem_err() Rcpp::stop("Error: insufficient memory. Try smaller sample sizes and fewer trees.\n")
|
|
91
|
+
|
|
92
|
+
SEXP alloc_RawVec(void *data)
|
|
93
|
+
{
|
|
94
|
+
size_t vecsize = *(size_t*)data;
|
|
95
|
+
if (unlikely(vecsize > (size_t)std::numeric_limits<R_xlen_t>::max()))
|
|
96
|
+
Rcpp::stop("Object is too big for R to handle.");
|
|
97
|
+
return Rcpp::RawVector((R_xlen_t)vecsize);
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
SEXP safe_copy_vec(void *data)
|
|
101
|
+
{
|
|
102
|
+
std::vector<double> *vec = (std::vector<double>*)data;
|
|
103
|
+
return Rcpp::NumericVector(vec->begin(), vec->end());
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
SEXP safe_copy_intvec(void *data)
|
|
107
|
+
{
|
|
108
|
+
std::vector<int> *vec = (std::vector<int>*)data;
|
|
109
|
+
return Rcpp::IntegerVector(vec->begin(), vec->end());
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
SEXP safe_int_matrix(void *dims)
|
|
113
|
+
{
|
|
114
|
+
size_t *dims_ = (size_t*)dims;
|
|
115
|
+
size_t nrows = dims_[0];
|
|
116
|
+
size_t ncols = dims_[1];
|
|
117
|
+
return Rcpp::IntegerMatrix(nrows, ncols);
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
template <class Model>
|
|
121
|
+
SEXP safe_XPtr(void *model_ptr)
|
|
122
|
+
{
|
|
123
|
+
return Rcpp::XPtr<Model>((Model*)model_ptr, true);
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
SEXP safe_errlist(void *ignored)
|
|
127
|
+
{
|
|
128
|
+
return Rcpp::List::create(Rcpp::_["err"] = Rcpp::LogicalVector::create(1));
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
SEXP safe_FALSE(void *ignored)
|
|
132
|
+
{
|
|
133
|
+
return Rcpp::LogicalVector::create(0);
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
Rcpp::RawVector resize_vec(Rcpp::RawVector inp, size_t new_size)
|
|
137
|
+
{
|
|
138
|
+
Rcpp::RawVector out = Rcpp::unwindProtect(alloc_RawVec, (void*)&new_size);
|
|
139
|
+
memcpy(RAW(out), RAW(inp), std::min((size_t)inp.size(), new_size));
|
|
140
|
+
return out;
|
|
141
|
+
}
|
|
142
|
+
|
|
60
143
|
/* for model serialization and re-usage in R */
|
|
61
144
|
/* https://stackoverflow.com/questions/18474292/how-to-handle-c-internal-data-structure-in-r-in-order-to-allow-save-load */
|
|
62
145
|
/* this extra comment below the link is a workaround for Rcpp issue 675 in GitHub, do not remove it */
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
Rcpp::RawVector serialize_cpp_obj(T *model_outputs)
|
|
146
|
+
template <class Model>
|
|
147
|
+
Rcpp::RawVector serialize_cpp_obj(const Model *model_outputs)
|
|
66
148
|
{
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
Rcpp::Rcerr << "Error: model is too big to serialize, resulting object will not be usable.\n" << std::endl;
|
|
77
|
-
return Rcpp::RawVector();
|
|
78
|
-
}
|
|
79
|
-
Rcpp::RawVector retval((size_t) vec_size);
|
|
80
|
-
ss.seekg(0, ss.beg);
|
|
81
|
-
ss.read(reinterpret_cast<char*>(&retval[0]), retval.size());
|
|
82
|
-
return retval;
|
|
149
|
+
size_t serialized_size = determine_serialized_size(*model_outputs);
|
|
150
|
+
if (unlikely(!serialized_size))
|
|
151
|
+
Rcpp::stop("Unexpected error.");
|
|
152
|
+
if (unlikely(serialized_size > (size_t)std::numeric_limits<R_xlen_t>::max()))
|
|
153
|
+
Rcpp::stop("Resulting model is too large for R to handle.");
|
|
154
|
+
Rcpp::RawVector out = Rcpp::unwindProtect(alloc_RawVec, (void*)&serialized_size);
|
|
155
|
+
char *out_ = (char*)RAW(out);
|
|
156
|
+
serialize_isotree(*model_outputs, out_);
|
|
157
|
+
return out;
|
|
83
158
|
}
|
|
84
159
|
|
|
85
|
-
template <class
|
|
160
|
+
template <class Model>
|
|
86
161
|
SEXP deserialize_cpp_obj(Rcpp::RawVector src)
|
|
87
162
|
{
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
return Rcpp::XPtr<T>(model_outputs.release(), true);
|
|
163
|
+
if (unlikely(!src.size()))
|
|
164
|
+
Rcpp::stop("Unexpected error.");
|
|
165
|
+
std::unique_ptr<Model> out(new Model());
|
|
166
|
+
const char *inp = (const char*)RAW(src);
|
|
167
|
+
deserialize_isotree(*out, inp);
|
|
168
|
+
SEXP out_ = Rcpp::unwindProtect(safe_XPtr<Model>, out.get());
|
|
169
|
+
out.release();
|
|
170
|
+
return out_;
|
|
97
171
|
}
|
|
98
172
|
|
|
99
|
-
// [[Rcpp::export]]
|
|
173
|
+
// [[Rcpp::export(rng = false)]]
|
|
100
174
|
SEXP deserialize_IsoForest(Rcpp::RawVector src)
|
|
101
175
|
{
|
|
102
176
|
return deserialize_cpp_obj<IsoForest>(src);
|
|
103
177
|
}
|
|
104
178
|
|
|
105
|
-
// [[Rcpp::export]]
|
|
179
|
+
// [[Rcpp::export(rng = false)]]
|
|
106
180
|
SEXP deserialize_ExtIsoForest(Rcpp::RawVector src)
|
|
107
181
|
{
|
|
108
182
|
return deserialize_cpp_obj<ExtIsoForest>(src);
|
|
109
183
|
}
|
|
110
184
|
|
|
111
|
-
// [[Rcpp::export]]
|
|
185
|
+
// [[Rcpp::export(rng = false)]]
|
|
112
186
|
SEXP deserialize_Imputer(Rcpp::RawVector src)
|
|
113
187
|
{
|
|
114
188
|
return deserialize_cpp_obj<Imputer>(src);
|
|
115
189
|
}
|
|
116
190
|
|
|
117
|
-
// [[Rcpp::export]]
|
|
191
|
+
// [[Rcpp::export(rng = false)]]
|
|
192
|
+
SEXP deserialize_Indexer(Rcpp::RawVector src)
|
|
193
|
+
{
|
|
194
|
+
return deserialize_cpp_obj<TreesIndexer>(src);
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
// [[Rcpp::export(rng = false)]]
|
|
118
198
|
Rcpp::LogicalVector check_null_ptr_model(SEXP ptr_model)
|
|
119
199
|
{
|
|
120
200
|
return Rcpp::LogicalVector(R_ExternalPtrAddr(ptr_model) == NULL);
|
|
@@ -123,79 +203,87 @@ Rcpp::LogicalVector check_null_ptr_model(SEXP ptr_model)
|
|
|
123
203
|
double* set_R_nan_as_C_nan(double *x, size_t n, std::vector<double> &v, int nthreads)
|
|
124
204
|
{
|
|
125
205
|
v.assign(x, x + n);
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
if (isnan(v[i]))
|
|
129
|
-
v[i] = NAN;
|
|
206
|
+
for (size_t i = 0; i < n; i++)
|
|
207
|
+
if (unlikely(std::isnan(v[i]))) v[i] = NAN;
|
|
130
208
|
return v.data();
|
|
131
209
|
}
|
|
132
210
|
|
|
211
|
+
double* set_R_nan_as_C_nan(double *x, size_t n, Rcpp::NumericVector &v, int nthreads)
|
|
212
|
+
{
|
|
213
|
+
v = Rcpp::NumericVector(x, x + n);
|
|
214
|
+
for (size_t i = 0; i < n; i++)
|
|
215
|
+
if (unlikely(std::isnan(v[i]))) v[i] = NAN;
|
|
216
|
+
return REAL(v);
|
|
217
|
+
}
|
|
218
|
+
|
|
133
219
|
double* set_R_nan_as_C_nan(double *x, size_t n, int nthreads)
|
|
134
220
|
{
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
if (isnan(x[i]))
|
|
138
|
-
x[i] = NAN;
|
|
221
|
+
for (size_t i = 0; i < n; i++)
|
|
222
|
+
if (unlikely(std::isnan(x[i]))) x[i] = NAN;
|
|
139
223
|
return x;
|
|
140
224
|
}
|
|
141
225
|
|
|
142
|
-
// [[Rcpp::export]]
|
|
226
|
+
// [[Rcpp::export(rng = false)]]
|
|
143
227
|
Rcpp::List fit_model(Rcpp::NumericVector X_num, Rcpp::IntegerVector X_cat, Rcpp::IntegerVector ncat,
|
|
144
228
|
Rcpp::NumericVector Xc, Rcpp::IntegerVector Xc_ind, Rcpp::IntegerVector Xc_indptr,
|
|
145
229
|
Rcpp::NumericVector sample_weights, Rcpp::NumericVector col_weights,
|
|
146
230
|
size_t nrows, size_t ncols_numeric, size_t ncols_categ, size_t ndim, size_t ntry,
|
|
147
231
|
Rcpp::CharacterVector coef_type, bool coef_by_prop, bool with_replacement, bool weight_as_sample,
|
|
148
|
-
size_t sample_size, size_t ntrees, size_t max_depth, bool limit_depth,
|
|
149
|
-
bool penalize_range, bool
|
|
232
|
+
size_t sample_size, size_t ntrees, size_t max_depth, size_t ncols_per_tree, bool limit_depth,
|
|
233
|
+
bool penalize_range, bool standardize_data,
|
|
234
|
+
Rcpp::CharacterVector scoring_metric, bool fast_bratio,
|
|
235
|
+
bool calc_dist, bool standardize_dist, bool sq_dist,
|
|
150
236
|
bool calc_depth, bool standardize_depth, bool weigh_by_kurt,
|
|
151
|
-
double
|
|
152
|
-
double
|
|
237
|
+
double prob_pick_by_gain_pl, double prob_pick_by_gain_avg,
|
|
238
|
+
double prob_pick_by_full_gain, double prob_pick_by_dens,
|
|
239
|
+
double prob_pick_col_by_range, double prob_pick_col_by_var,
|
|
240
|
+
double prob_pick_col_by_kurt, double min_gain,
|
|
153
241
|
Rcpp::CharacterVector cat_split_type, Rcpp::CharacterVector new_cat_action,
|
|
154
242
|
Rcpp::CharacterVector missing_action, bool all_perm,
|
|
155
243
|
bool build_imputer, bool output_imputations, size_t min_imp_obs,
|
|
156
244
|
Rcpp::CharacterVector depth_imp, Rcpp::CharacterVector weigh_imp_rows,
|
|
157
|
-
int random_seed, bool
|
|
245
|
+
int random_seed, bool use_long_double, int nthreads)
|
|
158
246
|
{
|
|
159
247
|
double* numeric_data_ptr = NULL;
|
|
160
248
|
int* categ_data_ptr = NULL;
|
|
161
249
|
int* ncat_ptr = NULL;
|
|
162
250
|
double* Xc_ptr = NULL;
|
|
163
|
-
|
|
164
|
-
|
|
251
|
+
int* Xc_ind_ptr = NULL;
|
|
252
|
+
int* Xc_indptr_ptr = NULL;
|
|
165
253
|
double* sample_weights_ptr = NULL;
|
|
166
254
|
double* col_weights_ptr = NULL;
|
|
167
|
-
|
|
255
|
+
Rcpp::NumericVector Xcpp;
|
|
168
256
|
|
|
169
257
|
if (X_num.size())
|
|
170
258
|
{
|
|
171
|
-
numeric_data_ptr =
|
|
172
|
-
if (Rcpp::as<std::string>(missing_action) !=
|
|
259
|
+
numeric_data_ptr = REAL(X_num);
|
|
260
|
+
if (Rcpp::as<std::string>(missing_action) != "fail")
|
|
173
261
|
numeric_data_ptr = set_R_nan_as_C_nan(numeric_data_ptr, nrows * ncols_numeric, Xcpp, nthreads);
|
|
174
262
|
}
|
|
175
263
|
|
|
176
264
|
if (X_cat.size())
|
|
177
265
|
{
|
|
178
|
-
categ_data_ptr =
|
|
179
|
-
ncat_ptr =
|
|
266
|
+
categ_data_ptr = INTEGER(X_cat);
|
|
267
|
+
ncat_ptr = INTEGER(ncat);
|
|
180
268
|
}
|
|
181
269
|
|
|
182
270
|
if (Xc.size())
|
|
183
271
|
{
|
|
184
|
-
Xc_ptr =
|
|
185
|
-
Xc_ind_ptr =
|
|
186
|
-
Xc_indptr_ptr =
|
|
187
|
-
if (Rcpp::as<std::string>(missing_action) !=
|
|
272
|
+
Xc_ptr = REAL(Xc);
|
|
273
|
+
Xc_ind_ptr = INTEGER(Xc_ind);
|
|
274
|
+
Xc_indptr_ptr = INTEGER(Xc_indptr);
|
|
275
|
+
if (Rcpp::as<std::string>(missing_action) != "fail")
|
|
188
276
|
Xc_ptr = set_R_nan_as_C_nan(Xc_ptr, Xc.size(), Xcpp, nthreads);
|
|
189
277
|
}
|
|
190
278
|
|
|
191
279
|
if (sample_weights.size())
|
|
192
280
|
{
|
|
193
|
-
sample_weights_ptr =
|
|
281
|
+
sample_weights_ptr = REAL(sample_weights);
|
|
194
282
|
}
|
|
195
283
|
|
|
196
284
|
if (col_weights.size())
|
|
197
285
|
{
|
|
198
|
-
col_weights_ptr =
|
|
286
|
+
col_weights_ptr = REAL(col_weights);
|
|
199
287
|
}
|
|
200
288
|
|
|
201
289
|
CoefType coef_type_C = Normal;
|
|
@@ -204,47 +292,72 @@ Rcpp::List fit_model(Rcpp::NumericVector X_num, Rcpp::IntegerVector X_cat, Rcpp:
|
|
|
204
292
|
MissingAction missing_action_C = Divide;
|
|
205
293
|
UseDepthImp depth_imp_C = Higher;
|
|
206
294
|
WeighImpRows weigh_imp_rows_C = Inverse;
|
|
295
|
+
ScoringMetric scoring_metric_C = Depth;
|
|
207
296
|
|
|
208
|
-
if (Rcpp::as<std::string>(coef_type) ==
|
|
297
|
+
if (Rcpp::as<std::string>(coef_type) == "uniform")
|
|
209
298
|
{
|
|
210
299
|
coef_type_C = Uniform;
|
|
211
300
|
}
|
|
212
|
-
if (Rcpp::as<std::string>(cat_split_type) ==
|
|
301
|
+
if (Rcpp::as<std::string>(cat_split_type) == "single_categ")
|
|
213
302
|
{
|
|
214
303
|
cat_split_type_C = SingleCateg;
|
|
215
304
|
}
|
|
216
|
-
if (Rcpp::as<std::string>(new_cat_action) ==
|
|
305
|
+
if (Rcpp::as<std::string>(new_cat_action) == "smallest")
|
|
217
306
|
{
|
|
218
307
|
new_cat_action_C = Smallest;
|
|
219
308
|
}
|
|
220
|
-
else if (Rcpp::as<std::string>(new_cat_action) ==
|
|
309
|
+
else if (Rcpp::as<std::string>(new_cat_action) == "random")
|
|
221
310
|
{
|
|
222
311
|
new_cat_action_C = Random;
|
|
223
312
|
}
|
|
224
|
-
if (Rcpp::as<std::string>(missing_action) ==
|
|
313
|
+
if (Rcpp::as<std::string>(missing_action) == "impute")
|
|
225
314
|
{
|
|
226
315
|
missing_action_C = Impute;
|
|
227
316
|
}
|
|
228
|
-
else if (Rcpp::as<std::string>(missing_action) ==
|
|
317
|
+
else if (Rcpp::as<std::string>(missing_action) == "fail")
|
|
229
318
|
{
|
|
230
319
|
missing_action_C = Fail;
|
|
231
320
|
}
|
|
232
|
-
if (Rcpp::as<std::string>(depth_imp) ==
|
|
321
|
+
if (Rcpp::as<std::string>(depth_imp) == "lower")
|
|
233
322
|
{
|
|
234
323
|
depth_imp_C = Lower;
|
|
235
324
|
}
|
|
236
|
-
else if (Rcpp::as<std::string>(depth_imp) ==
|
|
325
|
+
else if (Rcpp::as<std::string>(depth_imp) == "same")
|
|
237
326
|
{
|
|
238
327
|
depth_imp_C = Same;
|
|
239
328
|
}
|
|
240
|
-
if (Rcpp::as<std::string>(weigh_imp_rows) ==
|
|
329
|
+
if (Rcpp::as<std::string>(weigh_imp_rows) == "prop")
|
|
241
330
|
{
|
|
242
331
|
weigh_imp_rows_C = Prop;
|
|
243
332
|
}
|
|
244
|
-
else if (Rcpp::as<std::string>(weigh_imp_rows) ==
|
|
333
|
+
else if (Rcpp::as<std::string>(weigh_imp_rows) == "flat")
|
|
245
334
|
{
|
|
246
335
|
weigh_imp_rows_C = Flat;
|
|
247
336
|
}
|
|
337
|
+
if (Rcpp::as<std::string>(scoring_metric) == "adj_depth")
|
|
338
|
+
{
|
|
339
|
+
scoring_metric_C = AdjDepth;
|
|
340
|
+
}
|
|
341
|
+
else if (Rcpp::as<std::string>(scoring_metric) == "density")
|
|
342
|
+
{
|
|
343
|
+
scoring_metric_C = Density;
|
|
344
|
+
}
|
|
345
|
+
else if (Rcpp::as<std::string>(scoring_metric) == "adj_density")
|
|
346
|
+
{
|
|
347
|
+
scoring_metric_C = AdjDensity;
|
|
348
|
+
}
|
|
349
|
+
else if (Rcpp::as<std::string>(scoring_metric) == "boxed_density")
|
|
350
|
+
{
|
|
351
|
+
scoring_metric_C = BoxedDensity;
|
|
352
|
+
}
|
|
353
|
+
else if (Rcpp::as<std::string>(scoring_metric) == "boxed_density2")
|
|
354
|
+
{
|
|
355
|
+
scoring_metric_C = BoxedDensity2;
|
|
356
|
+
}
|
|
357
|
+
else if (Rcpp::as<std::string>(scoring_metric) == "boxed_ratio")
|
|
358
|
+
{
|
|
359
|
+
scoring_metric_C = BoxedRatio;
|
|
360
|
+
}
|
|
248
361
|
|
|
249
362
|
Rcpp::NumericVector tmat = Rcpp::NumericVector();
|
|
250
363
|
Rcpp::NumericMatrix dmat = Rcpp::NumericMatrix();
|
|
@@ -255,24 +368,37 @@ Rcpp::List fit_model(Rcpp::NumericVector X_num, Rcpp::IntegerVector X_cat, Rcpp:
|
|
|
255
368
|
|
|
256
369
|
if (calc_dist)
|
|
257
370
|
{
|
|
258
|
-
tmat = Rcpp::NumericVector((nrows
|
|
259
|
-
tmat_ptr =
|
|
371
|
+
tmat = Rcpp::NumericVector(calc_ncomb(nrows));
|
|
372
|
+
tmat_ptr = REAL(tmat);
|
|
260
373
|
if (sq_dist)
|
|
261
374
|
{
|
|
262
|
-
dmat = Rcpp::NumericMatrix(nrows);
|
|
263
|
-
dmat_ptr =
|
|
375
|
+
dmat = Rcpp::NumericMatrix(nrows, nrows);
|
|
376
|
+
dmat_ptr = REAL(dmat);
|
|
264
377
|
}
|
|
265
378
|
}
|
|
266
379
|
|
|
267
380
|
if (calc_depth)
|
|
268
381
|
{
|
|
269
382
|
depths = Rcpp::NumericVector(nrows);
|
|
270
|
-
depths_ptr =
|
|
383
|
+
depths_ptr = REAL(depths);
|
|
271
384
|
}
|
|
272
385
|
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
386
|
+
Rcpp::List outp = Rcpp::List::create(
|
|
387
|
+
Rcpp::_["depths"] = depths,
|
|
388
|
+
Rcpp::_["tmat"] = tmat,
|
|
389
|
+
Rcpp::_["dmat"] = dmat,
|
|
390
|
+
Rcpp::_["ptr"] = R_NilValue,
|
|
391
|
+
Rcpp::_["serialized"] = R_NilValue,
|
|
392
|
+
Rcpp::_["imp_ptr"] = R_NilValue,
|
|
393
|
+
Rcpp::_["imp_ser"] = R_NilValue,
|
|
394
|
+
Rcpp::_["imputed_num"] = R_NilValue,
|
|
395
|
+
Rcpp::_["imputed_cat"] = R_NilValue,
|
|
396
|
+
Rcpp::_["err"] = Rcpp::LogicalVector::create(1)
|
|
397
|
+
);
|
|
398
|
+
|
|
399
|
+
std::unique_ptr<IsoForest> model_ptr(nullptr);
|
|
400
|
+
std::unique_ptr<ExtIsoForest> ext_model_ptr(nullptr);
|
|
401
|
+
std::unique_ptr<Imputer> imputer_ptr(nullptr);
|
|
276
402
|
|
|
277
403
|
if (ndim == 1)
|
|
278
404
|
model_ptr = std::unique_ptr<IsoForest>(new IsoForest());
|
|
@@ -282,68 +408,86 @@ Rcpp::List fit_model(Rcpp::NumericVector X_num, Rcpp::IntegerVector X_cat, Rcpp:
|
|
|
282
408
|
if (build_imputer)
|
|
283
409
|
imputer_ptr = std::unique_ptr<Imputer>(new Imputer());
|
|
284
410
|
|
|
285
|
-
int ret_val
|
|
411
|
+
int ret_val;
|
|
412
|
+
try {
|
|
413
|
+
ret_val =
|
|
286
414
|
fit_iforest(model_ptr.get(), ext_model_ptr.get(),
|
|
287
415
|
numeric_data_ptr, ncols_numeric,
|
|
288
416
|
categ_data_ptr, ncols_categ, ncat_ptr,
|
|
289
417
|
Xc_ptr, Xc_ind_ptr, Xc_indptr_ptr,
|
|
290
418
|
ndim, ntry, coef_type_C, coef_by_prop,
|
|
291
419
|
sample_weights_ptr, with_replacement, weight_as_sample,
|
|
292
|
-
nrows, sample_size, ntrees, max_depth,
|
|
293
|
-
limit_depth, penalize_range,
|
|
420
|
+
nrows, sample_size, ntrees, max_depth, ncols_per_tree,
|
|
421
|
+
limit_depth, penalize_range, standardize_data,
|
|
422
|
+
scoring_metric_C, fast_bratio,
|
|
294
423
|
standardize_dist, tmat_ptr,
|
|
295
424
|
depths_ptr, standardize_depth,
|
|
296
425
|
col_weights_ptr, weigh_by_kurt,
|
|
297
|
-
|
|
298
|
-
|
|
426
|
+
prob_pick_by_gain_pl, prob_pick_by_gain_avg,
|
|
427
|
+
prob_pick_by_full_gain, prob_pick_by_dens,
|
|
428
|
+
prob_pick_col_by_range, prob_pick_col_by_var,
|
|
429
|
+
prob_pick_col_by_kurt,
|
|
299
430
|
min_gain, missing_action_C,
|
|
300
431
|
cat_split_type_C, new_cat_action_C,
|
|
301
432
|
all_perm, imputer_ptr.get(), min_imp_obs,
|
|
302
433
|
depth_imp_C, weigh_imp_rows_C, output_imputations,
|
|
303
|
-
(uint64_t) random_seed,
|
|
434
|
+
(uint64_t) random_seed, use_long_double, nthreads);
|
|
435
|
+
}
|
|
436
|
+
catch (std::bad_alloc &e) {
|
|
437
|
+
throw_mem_err();
|
|
438
|
+
}
|
|
439
|
+
Rcpp::checkUserInterrupt();
|
|
304
440
|
|
|
305
441
|
if (ret_val == EXIT_FAILURE)
|
|
306
442
|
{
|
|
307
|
-
return Rcpp::
|
|
443
|
+
return Rcpp::unwindProtect(safe_errlist, nullptr);
|
|
308
444
|
}
|
|
309
445
|
|
|
310
446
|
if (calc_dist && sq_dist)
|
|
311
|
-
tmat_to_dense(tmat_ptr, dmat_ptr, nrows,
|
|
447
|
+
tmat_to_dense(tmat_ptr, dmat_ptr, nrows, standardize_dist? 0. : std::numeric_limits<double>::infinity());
|
|
312
448
|
|
|
313
449
|
bool serialization_failed = false;
|
|
314
450
|
Rcpp::RawVector serialized_obj;
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
451
|
+
try {
|
|
452
|
+
if (ndim == 1)
|
|
453
|
+
serialized_obj = serialize_cpp_obj(model_ptr.get());
|
|
454
|
+
else
|
|
455
|
+
serialized_obj = serialize_cpp_obj(ext_model_ptr.get());
|
|
456
|
+
}
|
|
457
|
+
catch (std::bad_alloc &e) {
|
|
458
|
+
throw_mem_err();
|
|
459
|
+
}
|
|
460
|
+
if (unlikely(!serialized_obj.size())) serialization_failed = true;
|
|
461
|
+
if (unlikely(serialization_failed)) {
|
|
321
462
|
if (ndim == 1)
|
|
322
463
|
model_ptr.reset();
|
|
323
464
|
else
|
|
324
465
|
ext_model_ptr.reset();
|
|
325
466
|
}
|
|
326
467
|
|
|
327
|
-
Rcpp::List outp = Rcpp::List::create(
|
|
328
|
-
Rcpp::_["serialized_obj"] = serialized_obj,
|
|
329
|
-
Rcpp::_["depths"] = depths,
|
|
330
|
-
Rcpp::_["tmat"] = tmat,
|
|
331
|
-
Rcpp::_["dmat"] = dmat
|
|
332
|
-
);
|
|
333
|
-
|
|
334
468
|
if (!serialization_failed)
|
|
335
469
|
{
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
470
|
+
outp["serialized"] = serialized_obj;
|
|
471
|
+
if (ndim == 1) {
|
|
472
|
+
outp["ptr"] = Rcpp::unwindProtect(safe_XPtr<IsoForest>, model_ptr.get());
|
|
473
|
+
model_ptr.release();
|
|
474
|
+
}
|
|
475
|
+
else {
|
|
476
|
+
outp["ptr"] = Rcpp::unwindProtect(safe_XPtr<ExtIsoForest>, ext_model_ptr.get());
|
|
477
|
+
ext_model_ptr.release();
|
|
478
|
+
}
|
|
340
479
|
} else
|
|
341
|
-
outp["
|
|
480
|
+
outp["ptr"] = R_NilValue;
|
|
342
481
|
|
|
343
482
|
if (build_imputer && !serialization_failed)
|
|
344
483
|
{
|
|
345
|
-
|
|
346
|
-
|
|
484
|
+
try {
|
|
485
|
+
outp["imp_ser"] = serialize_cpp_obj(imputer_ptr.get());
|
|
486
|
+
}
|
|
487
|
+
catch (std::bad_alloc &e) {
|
|
488
|
+
throw_mem_err();
|
|
489
|
+
}
|
|
490
|
+
if (!Rf_xlength(outp["imp_ser"]))
|
|
347
491
|
{
|
|
348
492
|
serialization_failed = true;
|
|
349
493
|
imputer_ptr.reset();
|
|
@@ -351,79 +495,122 @@ Rcpp::List fit_model(Rcpp::NumericVector X_num, Rcpp::IntegerVector X_cat, Rcpp:
|
|
|
351
495
|
model_ptr.reset();
|
|
352
496
|
else
|
|
353
497
|
ext_model_ptr.reset();
|
|
354
|
-
outp["
|
|
355
|
-
outp["
|
|
356
|
-
} else
|
|
357
|
-
outp["
|
|
498
|
+
outp["imp_ptr"] = R_NilValue;
|
|
499
|
+
outp["ptr"] = R_NilValue;
|
|
500
|
+
} else {
|
|
501
|
+
outp["imp_ptr"] = Rcpp::unwindProtect(safe_XPtr<Imputer>, imputer_ptr.get());
|
|
502
|
+
imputer_ptr.release();
|
|
503
|
+
}
|
|
358
504
|
}
|
|
359
505
|
|
|
360
506
|
if (output_imputations && !serialization_failed)
|
|
361
507
|
{
|
|
362
|
-
outp["imputed_num"] =
|
|
508
|
+
outp["imputed_num"] = Xcpp;
|
|
363
509
|
outp["imputed_cat"] = X_cat;
|
|
364
510
|
}
|
|
365
511
|
|
|
366
|
-
outp["err"] = Rcpp::
|
|
367
|
-
|
|
512
|
+
outp["err"] = Rcpp::unwindProtect(safe_FALSE, nullptr);
|
|
368
513
|
return outp;
|
|
369
514
|
}
|
|
370
515
|
|
|
371
|
-
// [[Rcpp::export]]
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
516
|
+
// [[Rcpp::export(rng = false)]]
|
|
517
|
+
void fit_tree(SEXP model_R_ptr, Rcpp::RawVector serialized_obj, Rcpp::RawVector serialized_imputer,
|
|
518
|
+
SEXP indexer_R_ptr, Rcpp::RawVector serialized_indexer,
|
|
519
|
+
Rcpp::NumericVector X_num, Rcpp::IntegerVector X_cat, Rcpp::IntegerVector ncat,
|
|
520
|
+
Rcpp::NumericVector Xc, Rcpp::IntegerVector Xc_ind, Rcpp::IntegerVector Xc_indptr,
|
|
521
|
+
Rcpp::NumericVector sample_weights, Rcpp::NumericVector col_weights,
|
|
522
|
+
size_t nrows, size_t ncols_numeric, size_t ncols_categ,
|
|
523
|
+
size_t ndim, size_t ntry, Rcpp::CharacterVector coef_type, bool coef_by_prop,
|
|
524
|
+
size_t max_depth, size_t ncols_per_tree, bool limit_depth, bool penalize_range,
|
|
525
|
+
bool standardize_data, bool fast_bratio, bool weigh_by_kurt,
|
|
526
|
+
double prob_pick_by_gain_pl, double prob_pick_by_gain_avg,
|
|
527
|
+
double prob_pick_by_full_gain, double prob_pick_by_dens,
|
|
528
|
+
double prob_pick_col_by_range, double prob_pick_col_by_var,
|
|
529
|
+
double prob_pick_col_by_kurt, double min_gain,
|
|
530
|
+
Rcpp::CharacterVector cat_split_type, Rcpp::CharacterVector new_cat_action,
|
|
531
|
+
Rcpp::CharacterVector missing_action, bool build_imputer, size_t min_imp_obs, SEXP imp_R_ptr,
|
|
532
|
+
Rcpp::CharacterVector depth_imp, Rcpp::CharacterVector weigh_imp_rows,
|
|
533
|
+
bool all_perm,
|
|
534
|
+
Rcpp::NumericVector ref_X_num, Rcpp::IntegerVector ref_X_cat,
|
|
535
|
+
Rcpp::NumericVector ref_Xc, Rcpp::IntegerVector ref_Xc_ind, Rcpp::IntegerVector ref_Xc_indptr,
|
|
536
|
+
uint64_t random_seed, bool use_long_double,
|
|
537
|
+
Rcpp::List &model_cpp_obj_update, Rcpp::List &model_params_update)
|
|
386
538
|
{
|
|
539
|
+
Rcpp::List out = Rcpp::List::create(
|
|
540
|
+
Rcpp::_["serialized"] = R_NilValue,
|
|
541
|
+
Rcpp::_["imp_ser"] = R_NilValue,
|
|
542
|
+
Rcpp::_["ind_ser"] = R_NilValue
|
|
543
|
+
);
|
|
544
|
+
|
|
545
|
+
Rcpp::IntegerVector ntrees_plus1 = Rcpp::IntegerVector::create(Rf_asInteger(model_params_update["ntrees"]) + 1);
|
|
546
|
+
|
|
387
547
|
double* numeric_data_ptr = NULL;
|
|
388
548
|
int* categ_data_ptr = NULL;
|
|
389
549
|
int* ncat_ptr = NULL;
|
|
390
550
|
double* Xc_ptr = NULL;
|
|
391
|
-
|
|
392
|
-
|
|
551
|
+
int* Xc_ind_ptr = NULL;
|
|
552
|
+
int* Xc_indptr_ptr = NULL;
|
|
393
553
|
double* sample_weights_ptr = NULL;
|
|
394
554
|
double* col_weights_ptr = NULL;
|
|
395
|
-
|
|
555
|
+
Rcpp::NumericVector Xcpp;
|
|
396
556
|
|
|
397
557
|
if (X_num.size())
|
|
398
558
|
{
|
|
399
|
-
numeric_data_ptr =
|
|
400
|
-
if (Rcpp::as<std::string>(missing_action) !=
|
|
559
|
+
numeric_data_ptr = REAL(X_num);
|
|
560
|
+
if (Rcpp::as<std::string>(missing_action) != "fail")
|
|
401
561
|
numeric_data_ptr = set_R_nan_as_C_nan(numeric_data_ptr, nrows * ncols_numeric, Xcpp, 1);
|
|
402
562
|
}
|
|
403
563
|
|
|
404
564
|
if (X_cat.size())
|
|
405
565
|
{
|
|
406
|
-
categ_data_ptr =
|
|
407
|
-
ncat_ptr =
|
|
566
|
+
categ_data_ptr = INTEGER(X_cat);
|
|
567
|
+
ncat_ptr = INTEGER(ncat);
|
|
408
568
|
}
|
|
409
569
|
|
|
410
570
|
if (Xc.size())
|
|
411
571
|
{
|
|
412
|
-
Xc_ptr =
|
|
413
|
-
Xc_ind_ptr =
|
|
414
|
-
Xc_indptr_ptr =
|
|
415
|
-
if (Rcpp::as<std::string>(missing_action) !=
|
|
572
|
+
Xc_ptr = REAL(Xc);
|
|
573
|
+
Xc_ind_ptr = INTEGER(Xc_ind);
|
|
574
|
+
Xc_indptr_ptr = INTEGER(Xc_indptr);
|
|
575
|
+
if (Rcpp::as<std::string>(missing_action) != "fail")
|
|
416
576
|
Xc_ptr = set_R_nan_as_C_nan(Xc_ptr, Xc.size(), Xcpp, 1);
|
|
417
577
|
}
|
|
418
578
|
|
|
579
|
+
double* ref_numeric_data_ptr = NULL;
|
|
580
|
+
int* ref_categ_data_ptr = NULL;
|
|
581
|
+
double* ref_Xc_ptr = NULL;
|
|
582
|
+
int* ref_Xc_ind_ptr = NULL;
|
|
583
|
+
int* ref_Xc_indptr_ptr = NULL;
|
|
584
|
+
Rcpp::NumericVector ref_Xcpp;
|
|
585
|
+
if (ref_X_num.size())
|
|
586
|
+
{
|
|
587
|
+
ref_numeric_data_ptr = REAL(ref_X_num);
|
|
588
|
+
if (Rcpp::as<std::string>(missing_action) != "fail")
|
|
589
|
+
ref_numeric_data_ptr = set_R_nan_as_C_nan(ref_numeric_data_ptr, ref_X_num.size(), ref_Xcpp, 1);
|
|
590
|
+
}
|
|
591
|
+
|
|
592
|
+
if (ref_X_cat.size())
|
|
593
|
+
{
|
|
594
|
+
ref_categ_data_ptr = INTEGER(ref_X_cat);
|
|
595
|
+
}
|
|
596
|
+
|
|
597
|
+
if (ref_Xc.size())
|
|
598
|
+
{
|
|
599
|
+
ref_Xc_ptr = REAL(ref_Xc);
|
|
600
|
+
ref_Xc_ind_ptr = INTEGER(ref_Xc_ind);
|
|
601
|
+
ref_Xc_indptr_ptr = INTEGER(ref_Xc_indptr);
|
|
602
|
+
if (Rcpp::as<std::string>(missing_action) != "fail")
|
|
603
|
+
ref_Xc_ptr = set_R_nan_as_C_nan(ref_Xc_ptr, ref_Xc.size(), ref_Xcpp, 1);
|
|
604
|
+
}
|
|
605
|
+
|
|
419
606
|
if (sample_weights.size())
|
|
420
607
|
{
|
|
421
|
-
sample_weights_ptr =
|
|
608
|
+
sample_weights_ptr = REAL(sample_weights);
|
|
422
609
|
}
|
|
423
610
|
|
|
424
611
|
if (col_weights.size())
|
|
425
612
|
{
|
|
426
|
-
col_weights_ptr =
|
|
613
|
+
col_weights_ptr = REAL(col_weights);
|
|
427
614
|
}
|
|
428
615
|
|
|
429
616
|
CoefType coef_type_C = Normal;
|
|
@@ -433,62 +620,66 @@ Rcpp::RawVector fit_tree(SEXP model_R_ptr,
|
|
|
433
620
|
UseDepthImp depth_imp_C = Higher;
|
|
434
621
|
WeighImpRows weigh_imp_rows_C = Inverse;
|
|
435
622
|
|
|
436
|
-
if (Rcpp::as<std::string>(coef_type) ==
|
|
623
|
+
if (Rcpp::as<std::string>(coef_type) == "uniform")
|
|
437
624
|
{
|
|
438
625
|
coef_type_C = Uniform;
|
|
439
626
|
}
|
|
440
|
-
if (Rcpp::as<std::string>(cat_split_type) ==
|
|
627
|
+
if (Rcpp::as<std::string>(cat_split_type) == "single_categ")
|
|
441
628
|
{
|
|
442
629
|
cat_split_type_C = SingleCateg;
|
|
443
630
|
}
|
|
444
|
-
if (Rcpp::as<std::string>(new_cat_action) ==
|
|
631
|
+
if (Rcpp::as<std::string>(new_cat_action) == "smallest")
|
|
445
632
|
{
|
|
446
633
|
new_cat_action_C = Smallest;
|
|
447
634
|
}
|
|
448
|
-
else if (Rcpp::as<std::string>(new_cat_action) ==
|
|
635
|
+
else if (Rcpp::as<std::string>(new_cat_action) == "random")
|
|
449
636
|
{
|
|
450
637
|
new_cat_action_C = Random;
|
|
451
638
|
}
|
|
452
|
-
if (Rcpp::as<std::string>(missing_action) ==
|
|
639
|
+
if (Rcpp::as<std::string>(missing_action) == "impute")
|
|
453
640
|
{
|
|
454
641
|
missing_action_C = Impute;
|
|
455
642
|
}
|
|
456
|
-
else if (Rcpp::as<std::string>(missing_action) ==
|
|
643
|
+
else if (Rcpp::as<std::string>(missing_action) == "fail")
|
|
457
644
|
{
|
|
458
645
|
missing_action_C = Fail;
|
|
459
646
|
}
|
|
460
|
-
if (Rcpp::as<std::string>(depth_imp) ==
|
|
647
|
+
if (Rcpp::as<std::string>(depth_imp) == "lower")
|
|
461
648
|
{
|
|
462
649
|
depth_imp_C = Lower;
|
|
463
650
|
}
|
|
464
|
-
else if (Rcpp::as<std::string>(depth_imp) ==
|
|
651
|
+
else if (Rcpp::as<std::string>(depth_imp) == "same")
|
|
465
652
|
{
|
|
466
653
|
depth_imp_C = Same;
|
|
467
654
|
}
|
|
468
|
-
if (Rcpp::as<std::string>(weigh_imp_rows) ==
|
|
655
|
+
if (Rcpp::as<std::string>(weigh_imp_rows) == "prop")
|
|
469
656
|
{
|
|
470
657
|
weigh_imp_rows_C = Prop;
|
|
471
658
|
}
|
|
472
|
-
else if (Rcpp::as<std::string>(weigh_imp_rows) ==
|
|
659
|
+
else if (Rcpp::as<std::string>(weigh_imp_rows) == "flat")
|
|
473
660
|
{
|
|
474
661
|
weigh_imp_rows_C = Flat;
|
|
475
662
|
}
|
|
663
|
+
|
|
476
664
|
|
|
477
665
|
IsoForest* model_ptr = NULL;
|
|
478
666
|
ExtIsoForest* ext_model_ptr = NULL;
|
|
479
|
-
Imputer* imputer_ptr =
|
|
667
|
+
Imputer* imputer_ptr = NULL;
|
|
668
|
+
TreesIndexer* indexer_ptr = NULL;
|
|
480
669
|
if (ndim == 1)
|
|
481
670
|
model_ptr = static_cast<IsoForest*>(R_ExternalPtrAddr(model_R_ptr));
|
|
482
671
|
else
|
|
483
672
|
ext_model_ptr = static_cast<ExtIsoForest*>(R_ExternalPtrAddr(model_R_ptr));
|
|
484
673
|
|
|
485
|
-
std::vector<ImputeNode> *imp_ptr = NULL;
|
|
486
674
|
if (build_imputer)
|
|
487
|
-
{
|
|
488
675
|
imputer_ptr = static_cast<Imputer*>(R_ExternalPtrAddr(imp_R_ptr));
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
676
|
+
|
|
677
|
+
if (!Rf_isNull(indexer_R_ptr) && R_ExternalPtrAddr(indexer_R_ptr) != NULL)
|
|
678
|
+
indexer_ptr = static_cast<TreesIndexer*>(R_ExternalPtrAddr(indexer_R_ptr));
|
|
679
|
+
if (indexer_ptr != NULL && indexer_ptr->indices.empty())
|
|
680
|
+
indexer_ptr = NULL;
|
|
681
|
+
|
|
682
|
+
size_t old_ntrees = (ndim == 1)? (model_ptr->trees.size()) : (ext_model_ptr->hplanes.size());
|
|
492
683
|
|
|
493
684
|
add_tree(model_ptr, ext_model_ptr,
|
|
494
685
|
numeric_data_ptr, ncols_numeric,
|
|
@@ -496,24 +687,153 @@ Rcpp::RawVector fit_tree(SEXP model_R_ptr,
|
|
|
496
687
|
Xc_ptr, Xc_ind_ptr, Xc_indptr_ptr,
|
|
497
688
|
ndim, ntry, coef_type_C, coef_by_prop,
|
|
498
689
|
sample_weights_ptr,
|
|
499
|
-
nrows, max_depth,
|
|
500
|
-
limit_depth, penalize_range,
|
|
690
|
+
nrows, max_depth, ncols_per_tree,
|
|
691
|
+
limit_depth, penalize_range, standardize_data, fast_bratio,
|
|
501
692
|
col_weights_ptr, weigh_by_kurt,
|
|
502
|
-
|
|
503
|
-
|
|
693
|
+
prob_pick_by_gain_pl, prob_pick_by_gain_avg,
|
|
694
|
+
prob_pick_by_full_gain, prob_pick_by_dens,
|
|
695
|
+
prob_pick_col_by_range, prob_pick_col_by_var,
|
|
696
|
+
prob_pick_col_by_kurt,
|
|
504
697
|
min_gain, missing_action_C,
|
|
505
698
|
cat_split_type_C, new_cat_action_C,
|
|
506
699
|
depth_imp_C, weigh_imp_rows_C, all_perm,
|
|
507
|
-
|
|
700
|
+
imputer_ptr, min_imp_obs,
|
|
701
|
+
indexer_ptr,
|
|
702
|
+
ref_numeric_data_ptr, ref_categ_data_ptr,
|
|
703
|
+
true, (size_t)0, (size_t)0,
|
|
704
|
+
ref_Xc_ptr, ref_Xc_ind_ptr, ref_Xc_indptr_ptr,
|
|
705
|
+
(uint64_t)random_seed, use_long_double);
|
|
706
|
+
|
|
707
|
+
Rcpp::RawVector new_serialized, new_imp_serialized, new_ind_serialized;
|
|
708
|
+
size_t new_size;
|
|
709
|
+
try
|
|
710
|
+
{
|
|
711
|
+
if (ndim == 1)
|
|
712
|
+
{
|
|
713
|
+
if (serialized_obj.size() &&
|
|
714
|
+
check_can_undergo_incremental_serialization(*model_ptr, (char*)RAW(serialized_obj)))
|
|
715
|
+
{
|
|
716
|
+
try {
|
|
717
|
+
new_size = serialized_obj.size()
|
|
718
|
+
+ determine_serialized_size_additional_trees(*model_ptr, old_ntrees);
|
|
719
|
+
new_serialized = resize_vec(serialized_obj, new_size);
|
|
720
|
+
char *temp = (char*)RAW(new_serialized);
|
|
721
|
+
incremental_serialize_isotree(*model_ptr, temp);
|
|
722
|
+
out["serialized"] = new_serialized;
|
|
723
|
+
}
|
|
724
|
+
|
|
725
|
+
catch (std::runtime_error &e) {
|
|
726
|
+
goto serialize_anew_singlevar;
|
|
727
|
+
}
|
|
728
|
+
}
|
|
729
|
+
|
|
730
|
+
else {
|
|
731
|
+
serialize_anew_singlevar:
|
|
732
|
+
out["serialized"] = serialize_cpp_obj(model_ptr);
|
|
733
|
+
}
|
|
734
|
+
}
|
|
508
735
|
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
736
|
+
else
|
|
737
|
+
{
|
|
738
|
+
if (serialized_obj.size() &&
|
|
739
|
+
check_can_undergo_incremental_serialization(*ext_model_ptr, (char*)RAW(serialized_obj)))
|
|
740
|
+
{
|
|
741
|
+
try {
|
|
742
|
+
new_size = serialized_obj.size()
|
|
743
|
+
+ determine_serialized_size_additional_trees(*ext_model_ptr, old_ntrees);
|
|
744
|
+
new_serialized = resize_vec(serialized_obj, new_size);
|
|
745
|
+
char *temp = (char*)RAW(new_serialized);
|
|
746
|
+
incremental_serialize_isotree(*ext_model_ptr, temp);
|
|
747
|
+
out["serialized"] = new_serialized;
|
|
748
|
+
}
|
|
749
|
+
|
|
750
|
+
catch (std::runtime_error &e) {
|
|
751
|
+
goto serialize_anew_ext;
|
|
752
|
+
}
|
|
753
|
+
}
|
|
754
|
+
|
|
755
|
+
else {
|
|
756
|
+
serialize_anew_ext:
|
|
757
|
+
out["serialized"] = serialize_cpp_obj(ext_model_ptr);
|
|
758
|
+
}
|
|
759
|
+
}
|
|
760
|
+
|
|
761
|
+
if (imputer_ptr != NULL)
|
|
762
|
+
{
|
|
763
|
+
if (serialized_imputer.size() &&
|
|
764
|
+
check_can_undergo_incremental_serialization(*imputer_ptr, (char*)RAW(serialized_imputer)))
|
|
765
|
+
{
|
|
766
|
+
try {
|
|
767
|
+
new_size = serialized_imputer.size()
|
|
768
|
+
+ determine_serialized_size_additional_trees(*imputer_ptr, old_ntrees);
|
|
769
|
+
new_imp_serialized = resize_vec(serialized_imputer, new_size);
|
|
770
|
+
char *temp = (char*)RAW(new_imp_serialized);
|
|
771
|
+
incremental_serialize_isotree(*imputer_ptr, temp);
|
|
772
|
+
out["imp_ser"] = new_imp_serialized;
|
|
773
|
+
}
|
|
774
|
+
|
|
775
|
+
catch (std::runtime_error &e) {
|
|
776
|
+
goto serialize_anew_imp;
|
|
777
|
+
}
|
|
778
|
+
}
|
|
779
|
+
|
|
780
|
+
else {
|
|
781
|
+
serialize_anew_imp:
|
|
782
|
+
out["imp_ser"] = serialize_cpp_obj(imputer_ptr);
|
|
783
|
+
}
|
|
784
|
+
}
|
|
785
|
+
|
|
786
|
+
if (indexer_ptr != NULL)
|
|
787
|
+
{
|
|
788
|
+
if (serialized_indexer.size() &&
|
|
789
|
+
check_can_undergo_incremental_serialization(*indexer_ptr, (char*)RAW(serialized_indexer)))
|
|
790
|
+
{
|
|
791
|
+
try {
|
|
792
|
+
new_size = serialized_indexer.size()
|
|
793
|
+
+ determine_serialized_size_additional_trees(*indexer_ptr, old_ntrees);
|
|
794
|
+
new_ind_serialized = resize_vec(serialized_indexer, new_size);
|
|
795
|
+
char *temp = (char*)RAW(new_ind_serialized);
|
|
796
|
+
incremental_serialize_isotree(*indexer_ptr, temp);
|
|
797
|
+
out["ind_ser"] = new_ind_serialized;
|
|
798
|
+
}
|
|
799
|
+
|
|
800
|
+
catch (std::runtime_error &e) {
|
|
801
|
+
goto serialize_anew_ind;
|
|
802
|
+
}
|
|
803
|
+
}
|
|
804
|
+
|
|
805
|
+
else {
|
|
806
|
+
serialize_anew_ind:
|
|
807
|
+
out["ind_ser"] = serialize_cpp_obj(indexer_ptr);
|
|
808
|
+
}
|
|
809
|
+
}
|
|
810
|
+
}
|
|
811
|
+
|
|
812
|
+
catch (...)
|
|
813
|
+
{
|
|
814
|
+
if (ndim == 1)
|
|
815
|
+
model_ptr->trees.resize(old_ntrees);
|
|
816
|
+
else
|
|
817
|
+
ext_model_ptr->hplanes.resize(old_ntrees);
|
|
818
|
+
if (build_imputer)
|
|
819
|
+
imputer_ptr->imputer_tree.resize(old_ntrees);
|
|
820
|
+
if (indexer_ptr != NULL)
|
|
821
|
+
indexer_ptr->indices.resize(old_ntrees);
|
|
822
|
+
throw;
|
|
823
|
+
}
|
|
824
|
+
|
|
825
|
+
model_cpp_obj_update["serialized"] = out["serialized"];
|
|
826
|
+
if (build_imputer)
|
|
827
|
+
model_cpp_obj_update["imp_ser"] = out["imp_ser"];
|
|
828
|
+
if (indexer_ptr != NULL)
|
|
829
|
+
model_cpp_obj_update["ind_ser"] = out["ind_ser"];
|
|
830
|
+
model_params_update["ntrees"] = ntrees_plus1;
|
|
513
831
|
}
|
|
514
832
|
|
|
515
|
-
// [[Rcpp::export]]
|
|
516
|
-
void predict_iso(SEXP model_R_ptr,
|
|
833
|
+
// [[Rcpp::export(rng = false)]]
|
|
834
|
+
void predict_iso(SEXP model_R_ptr, bool is_extended,
|
|
835
|
+
SEXP indexer_R_ptr,
|
|
836
|
+
Rcpp::NumericVector outp, Rcpp::IntegerMatrix tree_num, Rcpp::NumericMatrix tree_depths,
|
|
517
837
|
Rcpp::NumericVector X_num, Rcpp::IntegerVector X_cat,
|
|
518
838
|
Rcpp::NumericVector Xc, Rcpp::IntegerVector Xc_ind, Rcpp::IntegerVector Xc_indptr,
|
|
519
839
|
Rcpp::NumericVector Xr, Rcpp::IntegerVector Xr_ind, Rcpp::IntegerVector Xr_indptr,
|
|
@@ -522,48 +842,40 @@ void predict_iso(SEXP model_R_ptr, Rcpp::NumericVector outp, Rcpp::IntegerVector
|
|
|
522
842
|
double* numeric_data_ptr = NULL;
|
|
523
843
|
int* categ_data_ptr = NULL;
|
|
524
844
|
double* Xc_ptr = NULL;
|
|
525
|
-
|
|
526
|
-
|
|
845
|
+
int* Xc_ind_ptr = NULL;
|
|
846
|
+
int* Xc_indptr_ptr = NULL;
|
|
527
847
|
double* Xr_ptr = NULL;
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
std::vector<double> Xcpp;
|
|
848
|
+
int* Xr_ind_ptr = NULL;
|
|
849
|
+
int* Xr_indptr_ptr = NULL;
|
|
850
|
+
Rcpp::NumericVector Xcpp;
|
|
532
851
|
|
|
533
852
|
if (X_num.size())
|
|
534
853
|
{
|
|
535
|
-
numeric_data_ptr =
|
|
854
|
+
numeric_data_ptr = REAL(X_num);
|
|
536
855
|
}
|
|
537
856
|
|
|
538
857
|
if (X_cat.size())
|
|
539
858
|
{
|
|
540
|
-
categ_data_ptr =
|
|
859
|
+
categ_data_ptr = INTEGER(X_cat);
|
|
541
860
|
}
|
|
542
861
|
|
|
543
862
|
if (Xc_indptr.size())
|
|
544
863
|
{
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
Xc_ind_ptr = &Xc_ind[0];
|
|
549
|
-
Xc_indptr_ptr = &Xc_indptr[0];
|
|
864
|
+
Xc_ptr = REAL(Xc);
|
|
865
|
+
Xc_ind_ptr = INTEGER(Xc_ind);
|
|
866
|
+
Xc_indptr_ptr = INTEGER(Xc_indptr);
|
|
550
867
|
}
|
|
551
868
|
|
|
552
869
|
if (Xr_indptr.size())
|
|
553
870
|
{
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
Xr_ind_ptr = &Xr_ind[0];
|
|
558
|
-
Xr_indptr_ptr = &Xr_indptr[0];
|
|
559
|
-
}
|
|
560
|
-
|
|
561
|
-
if (tree_num.size())
|
|
562
|
-
{
|
|
563
|
-
tree_num_ptr = &tree_num[0];
|
|
871
|
+
Xr_ptr = REAL(Xr);
|
|
872
|
+
Xr_ind_ptr = INTEGER(Xr_ind);
|
|
873
|
+
Xr_indptr_ptr = INTEGER(Xr_indptr);
|
|
564
874
|
}
|
|
565
875
|
|
|
566
|
-
double*
|
|
876
|
+
double *depths_ptr = REAL(outp);
|
|
877
|
+
double *tree_depths_ptr = tree_depths.size()? REAL(tree_depths) : NULL;
|
|
878
|
+
int *tree_num_ptr = tree_num.size()? INTEGER(tree_num) : NULL;
|
|
567
879
|
|
|
568
880
|
IsoForest* model_ptr = NULL;
|
|
569
881
|
ExtIsoForest* ext_model_ptr = NULL;
|
|
@@ -571,6 +883,11 @@ void predict_iso(SEXP model_R_ptr, Rcpp::NumericVector outp, Rcpp::IntegerVector
|
|
|
571
883
|
ext_model_ptr = static_cast<ExtIsoForest*>(R_ExternalPtrAddr(model_R_ptr));
|
|
572
884
|
else
|
|
573
885
|
model_ptr = static_cast<IsoForest*>(R_ExternalPtrAddr(model_R_ptr));
|
|
886
|
+
TreesIndexer* indexer = NULL;
|
|
887
|
+
if (!Rf_isNull(indexer_R_ptr) && R_ExternalPtrAddr(indexer_R_ptr) != NULL)
|
|
888
|
+
indexer = static_cast<TreesIndexer*>(R_ExternalPtrAddr(indexer_R_ptr));
|
|
889
|
+
if (indexer != NULL && indexer->indices.empty())
|
|
890
|
+
indexer = NULL;
|
|
574
891
|
|
|
575
892
|
MissingAction missing_action = is_extended?
|
|
576
893
|
ext_model_ptr->missing_action
|
|
@@ -583,58 +900,75 @@ void predict_iso(SEXP model_R_ptr, Rcpp::NumericVector outp, Rcpp::IntegerVector
|
|
|
583
900
|
if (Xr.size()) Xr_ptr = set_R_nan_as_C_nan(Xr_ptr, Xr.size(), Xcpp, nthreads);
|
|
584
901
|
}
|
|
585
902
|
|
|
586
|
-
predict_iforest(numeric_data_ptr, categ_data_ptr,
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
903
|
+
predict_iforest<double, int>(numeric_data_ptr, categ_data_ptr,
|
|
904
|
+
true, (size_t)0, (size_t)0,
|
|
905
|
+
Xc_ptr, Xc_ind_ptr, Xc_indptr_ptr,
|
|
906
|
+
Xr_ptr, Xr_ind_ptr, Xr_indptr_ptr,
|
|
907
|
+
nrows, nthreads, standardize,
|
|
908
|
+
model_ptr, ext_model_ptr,
|
|
909
|
+
depths_ptr, tree_num_ptr,
|
|
910
|
+
tree_depths_ptr,
|
|
911
|
+
indexer);
|
|
592
912
|
}
|
|
593
913
|
|
|
594
|
-
// [[Rcpp::export]]
|
|
595
|
-
void dist_iso(SEXP model_R_ptr,
|
|
596
|
-
Rcpp::NumericVector
|
|
914
|
+
// [[Rcpp::export(rng = false)]]
|
|
915
|
+
void dist_iso(SEXP model_R_ptr, SEXP indexer_R_ptr,
|
|
916
|
+
Rcpp::NumericVector tmat, Rcpp::NumericMatrix dmat,
|
|
917
|
+
Rcpp::NumericMatrix rmat, bool is_extended,
|
|
597
918
|
Rcpp::NumericVector X_num, Rcpp::IntegerVector X_cat,
|
|
598
919
|
Rcpp::NumericVector Xc, Rcpp::IntegerVector Xc_ind, Rcpp::IntegerVector Xc_indptr,
|
|
599
|
-
size_t nrows, int nthreads, bool assume_full_distr,
|
|
600
|
-
bool standardize_dist, bool sq_dist, size_t n_from
|
|
920
|
+
size_t nrows, bool use_long_double, int nthreads, bool assume_full_distr,
|
|
921
|
+
bool standardize_dist, bool sq_dist, size_t n_from,
|
|
922
|
+
bool use_reference_points, bool as_kernel)
|
|
601
923
|
{
|
|
602
924
|
double* numeric_data_ptr = NULL;
|
|
603
925
|
int* categ_data_ptr = NULL;
|
|
604
926
|
double* Xc_ptr = NULL;
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
927
|
+
int* Xc_ind_ptr = NULL;
|
|
928
|
+
int* Xc_indptr_ptr = NULL;
|
|
929
|
+
Rcpp::NumericVector Xcpp;
|
|
608
930
|
|
|
609
931
|
if (X_num.size())
|
|
610
932
|
{
|
|
611
|
-
numeric_data_ptr =
|
|
933
|
+
numeric_data_ptr = REAL(X_num);
|
|
612
934
|
}
|
|
613
935
|
|
|
614
936
|
if (X_cat.size())
|
|
615
937
|
{
|
|
616
|
-
categ_data_ptr =
|
|
938
|
+
categ_data_ptr = INTEGER(X_cat);
|
|
617
939
|
}
|
|
618
940
|
|
|
619
941
|
if (Xc_indptr.size())
|
|
620
942
|
{
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
Xc_ind_ptr = &Xc_ind[0];
|
|
625
|
-
Xc_indptr_ptr = &Xc_indptr[0];
|
|
943
|
+
Xc_ptr = REAL(Xc);
|
|
944
|
+
Xc_ind_ptr = INTEGER(Xc_ind);
|
|
945
|
+
Xc_indptr_ptr = INTEGER(Xc_indptr);
|
|
626
946
|
}
|
|
627
947
|
|
|
628
|
-
double* tmat_ptr = n_from? (double*)NULL :
|
|
629
|
-
double* dmat_ptr = (sq_dist & !n_from)?
|
|
630
|
-
double* rmat_ptr = n_from?
|
|
948
|
+
double* tmat_ptr = n_from? (double*)NULL : REAL(tmat);
|
|
949
|
+
double* dmat_ptr = (sq_dist & !n_from)? REAL(dmat) : NULL;
|
|
950
|
+
double* rmat_ptr = n_from? REAL(rmat) : NULL;
|
|
631
951
|
|
|
632
952
|
IsoForest* model_ptr = NULL;
|
|
633
953
|
ExtIsoForest* ext_model_ptr = NULL;
|
|
954
|
+
TreesIndexer* indexer = NULL;
|
|
634
955
|
if (is_extended)
|
|
635
956
|
ext_model_ptr = static_cast<ExtIsoForest*>(R_ExternalPtrAddr(model_R_ptr));
|
|
636
957
|
else
|
|
637
958
|
model_ptr = static_cast<IsoForest*>(R_ExternalPtrAddr(model_R_ptr));
|
|
959
|
+
if (!Rf_isNull(indexer_R_ptr) && R_ExternalPtrAddr(indexer_R_ptr) != NULL)
|
|
960
|
+
indexer = static_cast<TreesIndexer*>(R_ExternalPtrAddr(indexer_R_ptr));
|
|
961
|
+
if (indexer != NULL && (indexer->indices.empty() || (!as_kernel && indexer->indices.front().node_distances.empty())))
|
|
962
|
+
indexer = NULL;
|
|
963
|
+
|
|
964
|
+
if (use_reference_points && indexer != NULL && !indexer->indices.front().reference_points.empty()) {
|
|
965
|
+
tmat_ptr = NULL;
|
|
966
|
+
dmat_ptr = NULL;
|
|
967
|
+
rmat_ptr = REAL(rmat);
|
|
968
|
+
}
|
|
969
|
+
else {
|
|
970
|
+
use_reference_points = false;
|
|
971
|
+
}
|
|
638
972
|
|
|
639
973
|
|
|
640
974
|
MissingAction missing_action = is_extended?
|
|
@@ -650,43 +984,58 @@ void dist_iso(SEXP model_R_ptr, Rcpp::NumericVector tmat, Rcpp::NumericVector dm
|
|
|
650
984
|
|
|
651
985
|
calc_similarity(numeric_data_ptr, categ_data_ptr,
|
|
652
986
|
Xc_ptr, Xc_ind_ptr, Xc_indptr_ptr,
|
|
653
|
-
nrows,
|
|
987
|
+
nrows, use_long_double, nthreads,
|
|
988
|
+
assume_full_distr, standardize_dist, as_kernel,
|
|
654
989
|
model_ptr, ext_model_ptr,
|
|
655
|
-
tmat_ptr, rmat_ptr, n_from
|
|
990
|
+
tmat_ptr, rmat_ptr, n_from, use_reference_points,
|
|
991
|
+
indexer, true, (size_t)0, (size_t)0);
|
|
656
992
|
|
|
657
|
-
if (
|
|
658
|
-
|
|
993
|
+
if (tmat.size() && dmat.ncol() > 0)
|
|
994
|
+
{
|
|
995
|
+
double diag_filler;
|
|
996
|
+
if (as_kernel) {
|
|
997
|
+
if (standardize_dist)
|
|
998
|
+
diag_filler = 1.;
|
|
999
|
+
else
|
|
1000
|
+
diag_filler = (model_ptr != NULL)? model_ptr->trees.size() : ext_model_ptr->hplanes.size();
|
|
1001
|
+
}
|
|
1002
|
+
else {
|
|
1003
|
+
if (standardize_dist)
|
|
1004
|
+
diag_filler = 0;
|
|
1005
|
+
else
|
|
1006
|
+
diag_filler = std::numeric_limits<double>::infinity();
|
|
1007
|
+
}
|
|
1008
|
+
tmat_to_dense(tmat_ptr, dmat_ptr, nrows, diag_filler);
|
|
1009
|
+
}
|
|
659
1010
|
}
|
|
660
1011
|
|
|
661
|
-
// [[Rcpp::export]]
|
|
1012
|
+
// [[Rcpp::export(rng = false)]]
|
|
662
1013
|
Rcpp::List impute_iso(SEXP model_R_ptr, SEXP imputer_R_ptr, bool is_extended,
|
|
663
1014
|
Rcpp::NumericVector X_num, Rcpp::IntegerVector X_cat,
|
|
664
1015
|
Rcpp::NumericVector Xr, Rcpp::IntegerVector Xr_ind, Rcpp::IntegerVector Xr_indptr,
|
|
665
|
-
size_t nrows, int nthreads)
|
|
1016
|
+
size_t nrows, bool use_long_double, int nthreads)
|
|
666
1017
|
{
|
|
667
1018
|
double* numeric_data_ptr = NULL;
|
|
668
1019
|
int* categ_data_ptr = NULL;
|
|
669
1020
|
double* Xr_ptr = NULL;
|
|
670
|
-
|
|
671
|
-
|
|
1021
|
+
int* Xr_ind_ptr = NULL;
|
|
1022
|
+
int* Xr_indptr_ptr = NULL;
|
|
672
1023
|
|
|
673
1024
|
if (X_num.size())
|
|
674
1025
|
{
|
|
675
|
-
numeric_data_ptr =
|
|
1026
|
+
numeric_data_ptr = REAL(X_num);
|
|
676
1027
|
}
|
|
677
1028
|
|
|
678
1029
|
if (X_cat.size())
|
|
679
1030
|
{
|
|
680
|
-
categ_data_ptr =
|
|
1031
|
+
categ_data_ptr = INTEGER(X_cat);
|
|
681
1032
|
}
|
|
682
1033
|
|
|
683
1034
|
if (Xr_indptr.size())
|
|
684
1035
|
{
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
Xr_ind_ptr = &Xr_ind[0];
|
|
689
|
-
Xr_indptr_ptr = &Xr_indptr[0];
|
|
1036
|
+
Xr_ptr = REAL(Xr);
|
|
1037
|
+
Xr_ind_ptr = INTEGER(Xr_ind);
|
|
1038
|
+
Xr_indptr_ptr = INTEGER(Xr_indptr);
|
|
690
1039
|
}
|
|
691
1040
|
|
|
692
1041
|
if (X_num.size()) numeric_data_ptr = set_R_nan_as_C_nan(numeric_data_ptr, X_num.size(), nthreads);
|
|
@@ -702,9 +1051,9 @@ Rcpp::List impute_iso(SEXP model_R_ptr, SEXP imputer_R_ptr, bool is_extended,
|
|
|
702
1051
|
Imputer* imputer_ptr = static_cast<Imputer*>(R_ExternalPtrAddr(imputer_R_ptr));
|
|
703
1052
|
|
|
704
1053
|
|
|
705
|
-
impute_missing_values(numeric_data_ptr, categ_data_ptr,
|
|
1054
|
+
impute_missing_values(numeric_data_ptr, categ_data_ptr, true,
|
|
706
1055
|
Xr_ptr, Xr_ind_ptr, Xr_indptr_ptr,
|
|
707
|
-
nrows, nthreads,
|
|
1056
|
+
nrows, use_long_double, nthreads,
|
|
708
1057
|
model_ptr, ext_model_ptr,
|
|
709
1058
|
*imputer_ptr);
|
|
710
1059
|
|
|
@@ -714,7 +1063,187 @@ Rcpp::List impute_iso(SEXP model_R_ptr, SEXP imputer_R_ptr, bool is_extended,
|
|
|
714
1063
|
);
|
|
715
1064
|
}
|
|
716
1065
|
|
|
717
|
-
// [[Rcpp::export]]
|
|
1066
|
+
// [[Rcpp::export(rng = false)]]
|
|
1067
|
+
void drop_imputer(Rcpp::List lst_modify, Rcpp::List lst_modify2)
|
|
1068
|
+
{
|
|
1069
|
+
Rcpp::RawVector empty_ser = Rcpp::RawVector();
|
|
1070
|
+
Rcpp::LogicalVector FalseObj = Rcpp::LogicalVector::create(false);
|
|
1071
|
+
Rcpp::XPtr<Imputer> imp_ptr = lst_modify["imp_ptr"];
|
|
1072
|
+
imp_ptr.release();
|
|
1073
|
+
|
|
1074
|
+
lst_modify["imp_ser"] = empty_ser;
|
|
1075
|
+
lst_modify2["build_imputer"] = FalseObj;
|
|
1076
|
+
}
|
|
1077
|
+
|
|
1078
|
+
// [[Rcpp::export(rng = false)]]
|
|
1079
|
+
void drop_indexer(Rcpp::List lst_modify, Rcpp::List lst_modify2)
|
|
1080
|
+
{
|
|
1081
|
+
Rcpp::XPtr<TreesIndexer> empty_ptr = Rcpp::XPtr<TreesIndexer>(nullptr, false);
|
|
1082
|
+
Rcpp::RawVector empty_ser = Rcpp::RawVector();
|
|
1083
|
+
Rcpp::CharacterVector empty_char = Rcpp::CharacterVector();
|
|
1084
|
+
Rcpp::XPtr<TreesIndexer> indexer = lst_modify["indexer"];
|
|
1085
|
+
indexer.release();
|
|
1086
|
+
|
|
1087
|
+
lst_modify["ind_ser"] = empty_ser;
|
|
1088
|
+
lst_modify2["reference_names"] = empty_char;
|
|
1089
|
+
}
|
|
1090
|
+
|
|
1091
|
+
// [[Rcpp::export(rng = false)]]
|
|
1092
|
+
void drop_reference_points(Rcpp::List lst_modify, Rcpp::List lst_modify2)
|
|
1093
|
+
{
|
|
1094
|
+
Rcpp::CharacterVector empty_char = Rcpp::CharacterVector();
|
|
1095
|
+
Rcpp::RawVector empty_ser = Rcpp::RawVector();
|
|
1096
|
+
Rcpp::XPtr<TreesIndexer> indexer_R_ptr = lst_modify["indexer"];
|
|
1097
|
+
TreesIndexer *indexer_ptr = indexer_R_ptr.get();
|
|
1098
|
+
if (indexer_ptr == NULL) {
|
|
1099
|
+
lst_modify["ind_ser"] = empty_ser;
|
|
1100
|
+
lst_modify2["reference_names"] = empty_char;
|
|
1101
|
+
return;
|
|
1102
|
+
}
|
|
1103
|
+
if (indexer_ptr->indices.empty()) {
|
|
1104
|
+
indexer_R_ptr.release();
|
|
1105
|
+
lst_modify["ind_ser"] = empty_ser;
|
|
1106
|
+
lst_modify2["reference_names"] = empty_char;
|
|
1107
|
+
return;
|
|
1108
|
+
}
|
|
1109
|
+
if (indexer_ptr->indices.front().reference_points.empty()) {
|
|
1110
|
+
lst_modify2["reference_names"] = empty_char;
|
|
1111
|
+
return;
|
|
1112
|
+
}
|
|
1113
|
+
|
|
1114
|
+
std::unique_ptr<TreesIndexer> new_indexer(new TreesIndexer(*indexer_ptr));
|
|
1115
|
+
for (auto &tree : new_indexer->indices)
|
|
1116
|
+
{
|
|
1117
|
+
tree.reference_points.clear();
|
|
1118
|
+
tree.reference_indptr.clear();
|
|
1119
|
+
tree.reference_mapping.clear();
|
|
1120
|
+
}
|
|
1121
|
+
Rcpp::RawVector ind_ser = serialize_cpp_obj(new_indexer.get());
|
|
1122
|
+
*indexer_ptr = std::move(*new_indexer);
|
|
1123
|
+
new_indexer.release();
|
|
1124
|
+
lst_modify["ind_ser"] = ind_ser;
|
|
1125
|
+
lst_modify2["reference_names"] = empty_char;
|
|
1126
|
+
}
|
|
1127
|
+
|
|
1128
|
+
// [[Rcpp::export(rng = false)]]
|
|
1129
|
+
Rcpp::List subset_trees
|
|
1130
|
+
(
|
|
1131
|
+
SEXP model_R_ptr, SEXP imputer_R_ptr, SEXP indexer_R_ptr,
|
|
1132
|
+
bool is_extended, bool has_imputer,
|
|
1133
|
+
Rcpp::IntegerVector trees_take
|
|
1134
|
+
)
|
|
1135
|
+
{
|
|
1136
|
+
bool has_indexer = !Rf_isNull(indexer_R_ptr) && R_ExternalPtrAddr(indexer_R_ptr) != NULL;
|
|
1137
|
+
|
|
1138
|
+
Rcpp::List out = Rcpp::List::create(
|
|
1139
|
+
Rcpp::_["ptr"] = R_NilValue,
|
|
1140
|
+
Rcpp::_["serialized"] = R_NilValue,
|
|
1141
|
+
Rcpp::_["imp_ptr"] = R_NilValue,
|
|
1142
|
+
Rcpp::_["imp_ser"] = R_NilValue,
|
|
1143
|
+
Rcpp::_["indexer"] = R_NilValue,
|
|
1144
|
+
Rcpp::_["ind_ser"] = R_NilValue
|
|
1145
|
+
);
|
|
1146
|
+
|
|
1147
|
+
IsoForest* model_ptr = NULL;
|
|
1148
|
+
ExtIsoForest* ext_model_ptr = NULL;
|
|
1149
|
+
Imputer* imputer_ptr = NULL;
|
|
1150
|
+
TreesIndexer* indexer_ptr = NULL;
|
|
1151
|
+
std::unique_ptr<IsoForest> new_model_ptr(nullptr);
|
|
1152
|
+
std::unique_ptr<ExtIsoForest> new_ext_model_ptr(nullptr);
|
|
1153
|
+
std::unique_ptr<Imputer> new_imputer_ptr(nullptr);
|
|
1154
|
+
std::unique_ptr<TreesIndexer> new_indexer_ptr(nullptr);
|
|
1155
|
+
|
|
1156
|
+
if (is_extended) {
|
|
1157
|
+
ext_model_ptr = static_cast<ExtIsoForest*>(R_ExternalPtrAddr(model_R_ptr));
|
|
1158
|
+
new_ext_model_ptr = std::unique_ptr<ExtIsoForest>(new ExtIsoForest());
|
|
1159
|
+
}
|
|
1160
|
+
else {
|
|
1161
|
+
model_ptr = static_cast<IsoForest*>(R_ExternalPtrAddr(model_R_ptr));
|
|
1162
|
+
new_model_ptr = std::unique_ptr<IsoForest>(new IsoForest());
|
|
1163
|
+
}
|
|
1164
|
+
|
|
1165
|
+
|
|
1166
|
+
if (has_imputer) {
|
|
1167
|
+
imputer_ptr = static_cast<Imputer*>(R_ExternalPtrAddr(imputer_R_ptr));
|
|
1168
|
+
new_imputer_ptr = std::unique_ptr<Imputer>(new Imputer());
|
|
1169
|
+
}
|
|
1170
|
+
|
|
1171
|
+
if (has_indexer) {
|
|
1172
|
+
indexer_ptr = static_cast<TreesIndexer*>(R_ExternalPtrAddr(indexer_R_ptr));
|
|
1173
|
+
new_indexer_ptr = std::unique_ptr<TreesIndexer>(new TreesIndexer());
|
|
1174
|
+
}
|
|
1175
|
+
|
|
1176
|
+
std::unique_ptr<size_t[]> trees_take_(new size_t[trees_take.size()]);
|
|
1177
|
+
for (decltype(trees_take.size()) ix = 0; ix < trees_take.size(); ix++)
|
|
1178
|
+
trees_take_[ix] = (size_t)(trees_take[ix] - 1);
|
|
1179
|
+
|
|
1180
|
+
subset_model(model_ptr, new_model_ptr.get(),
|
|
1181
|
+
ext_model_ptr, new_ext_model_ptr.get(),
|
|
1182
|
+
imputer_ptr, new_imputer_ptr.get(),
|
|
1183
|
+
indexer_ptr, new_indexer_ptr.get(),
|
|
1184
|
+
trees_take_.get(), trees_take.size());
|
|
1185
|
+
trees_take_.reset();
|
|
1186
|
+
|
|
1187
|
+
if (!is_extended)
|
|
1188
|
+
out["serialized"] = serialize_cpp_obj(new_model_ptr.get());
|
|
1189
|
+
else
|
|
1190
|
+
out["serialized"] = serialize_cpp_obj(new_ext_model_ptr.get());
|
|
1191
|
+
if (has_imputer)
|
|
1192
|
+
out["imp_ser"] = serialize_cpp_obj(new_imputer_ptr.get());
|
|
1193
|
+
if (has_indexer)
|
|
1194
|
+
out["ind_ser"] = serialize_cpp_obj(new_indexer_ptr.get());
|
|
1195
|
+
|
|
1196
|
+
if (!is_extended) {
|
|
1197
|
+
out["ptr"] = Rcpp::unwindProtect(safe_XPtr<IsoForest>, new_model_ptr.get());
|
|
1198
|
+
new_model_ptr.release();
|
|
1199
|
+
}
|
|
1200
|
+
else {
|
|
1201
|
+
out["ptr"] = Rcpp::unwindProtect(safe_XPtr<ExtIsoForest>, new_ext_model_ptr.get());
|
|
1202
|
+
new_ext_model_ptr.release();
|
|
1203
|
+
}
|
|
1204
|
+
if (has_imputer) {
|
|
1205
|
+
out["imp_ptr"] = Rcpp::unwindProtect(safe_XPtr<Imputer>, new_imputer_ptr.get());
|
|
1206
|
+
new_imputer_ptr.release();
|
|
1207
|
+
}
|
|
1208
|
+
if (has_indexer) {
|
|
1209
|
+
out["indexer"] = Rcpp::unwindProtect(safe_XPtr<TreesIndexer>, new_indexer_ptr.get());
|
|
1210
|
+
new_indexer_ptr.release();
|
|
1211
|
+
}
|
|
1212
|
+
return out;
|
|
1213
|
+
}
|
|
1214
|
+
|
|
1215
|
+
// [[Rcpp::export(rng = false)]]
|
|
1216
|
+
void inplace_set_to_zero(SEXP obj)
|
|
1217
|
+
{
|
|
1218
|
+
auto obj_type = TYPEOF(obj);
|
|
1219
|
+
switch(obj_type)
|
|
1220
|
+
{
|
|
1221
|
+
case REALSXP:
|
|
1222
|
+
{
|
|
1223
|
+
REAL(obj)[0] = 0;
|
|
1224
|
+
break;
|
|
1225
|
+
}
|
|
1226
|
+
|
|
1227
|
+
case INTSXP:
|
|
1228
|
+
{
|
|
1229
|
+
INTEGER(obj)[0] = 0;
|
|
1230
|
+
break;
|
|
1231
|
+
}
|
|
1232
|
+
|
|
1233
|
+
case LGLSXP:
|
|
1234
|
+
{
|
|
1235
|
+
LOGICAL(obj)[0] = 0;
|
|
1236
|
+
break;
|
|
1237
|
+
}
|
|
1238
|
+
|
|
1239
|
+
default:
|
|
1240
|
+
{
|
|
1241
|
+
Rcpp::stop("Model object has incorrect structure.\n");
|
|
1242
|
+
}
|
|
1243
|
+
}
|
|
1244
|
+
}
|
|
1245
|
+
|
|
1246
|
+
// [[Rcpp::export(rng = false)]]
|
|
718
1247
|
Rcpp::List get_n_nodes(SEXP model_R_ptr, bool is_extended, int nthreads)
|
|
719
1248
|
{
|
|
720
1249
|
size_t ntrees;
|
|
@@ -734,9 +1263,9 @@ Rcpp::List get_n_nodes(SEXP model_R_ptr, bool is_extended, int nthreads)
|
|
|
734
1263
|
Rcpp::IntegerVector n_nodes(ntrees);
|
|
735
1264
|
Rcpp::IntegerVector n_terminal(ntrees);
|
|
736
1265
|
if (is_extended)
|
|
737
|
-
get_num_nodes(*ext_model_ptr,
|
|
1266
|
+
get_num_nodes(*ext_model_ptr, INTEGER(n_nodes), INTEGER(n_terminal), nthreads);
|
|
738
1267
|
else
|
|
739
|
-
get_num_nodes(*model_ptr,
|
|
1268
|
+
get_num_nodes(*model_ptr, INTEGER(n_nodes), INTEGER(n_terminal), nthreads);
|
|
740
1269
|
|
|
741
1270
|
return Rcpp::List::create(
|
|
742
1271
|
Rcpp::_["total"] = n_nodes,
|
|
@@ -744,25 +1273,56 @@ Rcpp::List get_n_nodes(SEXP model_R_ptr, bool is_extended, int nthreads)
|
|
|
744
1273
|
);
|
|
745
1274
|
}
|
|
746
1275
|
|
|
747
|
-
// [[Rcpp::export]]
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
1276
|
+
// [[Rcpp::export(rng = false)]]
|
|
1277
|
+
void append_trees_from_other(SEXP model_R_ptr, SEXP other_R_ptr,
|
|
1278
|
+
SEXP imp_R_ptr, SEXP oimp_R_ptr,
|
|
1279
|
+
SEXP ind_R_ptr, SEXP oind_R_ptr,
|
|
1280
|
+
bool is_extended,
|
|
1281
|
+
Rcpp::RawVector serialized_obj,
|
|
1282
|
+
Rcpp::RawVector serialized_imputer,
|
|
1283
|
+
Rcpp::RawVector serialized_indexer,
|
|
1284
|
+
Rcpp::List &model_cpp_obj_update,
|
|
1285
|
+
Rcpp::List &model_params_update)
|
|
751
1286
|
{
|
|
752
|
-
|
|
1287
|
+
if ((!Rf_isNull(imp_R_ptr) && R_ExternalPtrAddr(imp_R_ptr) != NULL)
|
|
1288
|
+
&&
|
|
1289
|
+
!(!Rf_isNull(oimp_R_ptr) && R_ExternalPtrAddr(oimp_R_ptr) != NULL))
|
|
1290
|
+
{
|
|
1291
|
+
Rcpp::stop("Model to append trees to has imputer, but model to append from doesn't. Try dropping the imputer.\n");
|
|
1292
|
+
}
|
|
1293
|
+
if ((!Rf_isNull(ind_R_ptr) && R_ExternalPtrAddr(ind_R_ptr) != NULL)
|
|
1294
|
+
&&
|
|
1295
|
+
!(!Rf_isNull(oind_R_ptr) && R_ExternalPtrAddr(oind_R_ptr) != NULL))
|
|
1296
|
+
{
|
|
1297
|
+
Rcpp::stop("Model to append trees to has indexer, but model to append from doesn't. Try dropping the indexer.\n");
|
|
1298
|
+
}
|
|
1299
|
+
|
|
1300
|
+
Rcpp::List out = Rcpp::List::create(
|
|
1301
|
+
Rcpp::_["serialized"] = R_NilValue,
|
|
1302
|
+
Rcpp::_["imp_ser"] = R_NilValue,
|
|
1303
|
+
Rcpp::_["ind_ser"] = R_NilValue
|
|
1304
|
+
);
|
|
1305
|
+
|
|
1306
|
+
Rcpp::IntegerVector ntrees_new = Rcpp::IntegerVector::create(Rf_asInteger(model_params_update["ntrees"]));
|
|
1307
|
+
|
|
753
1308
|
IsoForest* model_ptr = NULL;
|
|
754
1309
|
IsoForest* other_ptr = NULL;
|
|
755
1310
|
ExtIsoForest* ext_model_ptr = NULL;
|
|
756
1311
|
ExtIsoForest* ext_other_ptr = NULL;
|
|
757
1312
|
Imputer* imputer_ptr = NULL;
|
|
758
1313
|
Imputer* oimputer_ptr = NULL;
|
|
1314
|
+
TreesIndexer* indexer_ptr = NULL;
|
|
1315
|
+
TreesIndexer* oindexer_ptr = NULL;
|
|
1316
|
+
size_t old_ntrees;
|
|
759
1317
|
|
|
760
1318
|
if (is_extended) {
|
|
761
1319
|
ext_model_ptr = static_cast<ExtIsoForest*>(R_ExternalPtrAddr(model_R_ptr));
|
|
762
1320
|
ext_other_ptr = static_cast<ExtIsoForest*>(R_ExternalPtrAddr(other_R_ptr));
|
|
1321
|
+
old_ntrees = ext_model_ptr->hplanes.size();
|
|
763
1322
|
} else {
|
|
764
1323
|
model_ptr = static_cast<IsoForest*>(R_ExternalPtrAddr(model_R_ptr));
|
|
765
1324
|
other_ptr = static_cast<IsoForest*>(R_ExternalPtrAddr(other_R_ptr));
|
|
1325
|
+
old_ntrees = model_ptr->trees.size();
|
|
766
1326
|
}
|
|
767
1327
|
|
|
768
1328
|
if (!Rf_isNull(imp_R_ptr) && !Rf_isNull(oimp_R_ptr) &&
|
|
@@ -773,23 +1333,158 @@ Rcpp::List append_trees_from_other(SEXP model_R_ptr, SEXP other_R_ptr,
|
|
|
773
1333
|
oimputer_ptr = static_cast<Imputer*>(R_ExternalPtrAddr(oimp_R_ptr));
|
|
774
1334
|
}
|
|
775
1335
|
|
|
1336
|
+
if (!Rf_isNull(ind_R_ptr) && !Rf_isNull(oind_R_ptr) &&
|
|
1337
|
+
R_ExternalPtrAddr(ind_R_ptr) != NULL &&
|
|
1338
|
+
R_ExternalPtrAddr(oind_R_ptr) != NULL)
|
|
1339
|
+
{
|
|
1340
|
+
indexer_ptr = static_cast<TreesIndexer*>(R_ExternalPtrAddr(ind_R_ptr));
|
|
1341
|
+
oindexer_ptr = static_cast<TreesIndexer*>(R_ExternalPtrAddr(oind_R_ptr));
|
|
1342
|
+
}
|
|
1343
|
+
|
|
776
1344
|
merge_models(model_ptr, other_ptr,
|
|
777
1345
|
ext_model_ptr, ext_other_ptr,
|
|
778
|
-
imputer_ptr, oimputer_ptr
|
|
1346
|
+
imputer_ptr, oimputer_ptr,
|
|
1347
|
+
indexer_ptr, oindexer_ptr);
|
|
779
1348
|
|
|
1349
|
+
Rcpp::RawVector new_serialized, new_imp_serialized, new_ind_serialized;
|
|
1350
|
+
size_t new_size;
|
|
1351
|
+
try
|
|
1352
|
+
{
|
|
1353
|
+
if (!is_extended)
|
|
1354
|
+
{
|
|
1355
|
+
if (serialized_obj.size() &&
|
|
1356
|
+
check_can_undergo_incremental_serialization(*model_ptr, (char*)RAW(serialized_obj)))
|
|
1357
|
+
{
|
|
1358
|
+
try {
|
|
1359
|
+
new_size = serialized_obj.size()
|
|
1360
|
+
+ determine_serialized_size_additional_trees(*model_ptr, old_ntrees);
|
|
1361
|
+
new_serialized = resize_vec(serialized_obj, new_size);
|
|
1362
|
+
char *temp = (char*)RAW(new_serialized);
|
|
1363
|
+
incremental_serialize_isotree(*model_ptr, temp);
|
|
1364
|
+
out["serialized"] = new_serialized;
|
|
1365
|
+
}
|
|
1366
|
+
|
|
1367
|
+
catch (std::runtime_error &e) {
|
|
1368
|
+
goto serialize_anew_singlevar;
|
|
1369
|
+
}
|
|
1370
|
+
}
|
|
1371
|
+
|
|
1372
|
+
else {
|
|
1373
|
+
serialize_anew_singlevar:
|
|
1374
|
+
out["serialized"] = serialize_cpp_obj(model_ptr);
|
|
1375
|
+
}
|
|
1376
|
+
}
|
|
780
1377
|
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
1378
|
+
else
|
|
1379
|
+
{
|
|
1380
|
+
if (serialized_obj.size() &&
|
|
1381
|
+
check_can_undergo_incremental_serialization(*ext_model_ptr, (char*)RAW(serialized_obj)))
|
|
1382
|
+
{
|
|
1383
|
+
try {
|
|
1384
|
+
new_size = serialized_obj.size()
|
|
1385
|
+
+ determine_serialized_size_additional_trees(*ext_model_ptr, old_ntrees);
|
|
1386
|
+
new_serialized = resize_vec(serialized_obj, new_size);
|
|
1387
|
+
char *temp = (char*)RAW(new_serialized);
|
|
1388
|
+
incremental_serialize_isotree(*ext_model_ptr, temp);
|
|
1389
|
+
out["serialized"] = new_serialized;
|
|
1390
|
+
}
|
|
1391
|
+
|
|
1392
|
+
catch (std::runtime_error &e) {
|
|
1393
|
+
goto serialize_anew_ext;
|
|
1394
|
+
}
|
|
1395
|
+
}
|
|
1396
|
+
|
|
1397
|
+
else {
|
|
1398
|
+
serialize_anew_ext:
|
|
1399
|
+
out["serialized"] = serialize_cpp_obj(ext_model_ptr);
|
|
1400
|
+
}
|
|
1401
|
+
}
|
|
785
1402
|
|
|
786
|
-
|
|
787
|
-
|
|
1403
|
+
if (imputer_ptr != NULL)
|
|
1404
|
+
{
|
|
1405
|
+
if (serialized_imputer.size() &&
|
|
1406
|
+
check_can_undergo_incremental_serialization(*imputer_ptr, (char*)RAW(serialized_imputer)))
|
|
1407
|
+
{
|
|
1408
|
+
try {
|
|
1409
|
+
new_size = serialized_obj.size()
|
|
1410
|
+
+ determine_serialized_size_additional_trees(*imputer_ptr, old_ntrees);
|
|
1411
|
+
new_imp_serialized = resize_vec(serialized_imputer, new_size);
|
|
1412
|
+
char *temp = (char*)RAW(new_imp_serialized);
|
|
1413
|
+
incremental_serialize_isotree(*imputer_ptr, temp);
|
|
1414
|
+
out["imp_ser"] = new_imp_serialized;
|
|
1415
|
+
}
|
|
1416
|
+
|
|
1417
|
+
catch (std::runtime_error &e) {
|
|
1418
|
+
goto serialize_anew_imp;
|
|
1419
|
+
}
|
|
1420
|
+
}
|
|
1421
|
+
|
|
1422
|
+
else {
|
|
1423
|
+
serialize_anew_imp:
|
|
1424
|
+
out["imp_ser"] = serialize_cpp_obj(imputer_ptr);
|
|
1425
|
+
}
|
|
1426
|
+
}
|
|
788
1427
|
|
|
789
|
-
|
|
1428
|
+
if (indexer_ptr != NULL)
|
|
1429
|
+
{
|
|
1430
|
+
if (serialized_indexer.size() &&
|
|
1431
|
+
check_can_undergo_incremental_serialization(*indexer_ptr, (char*)RAW(serialized_indexer)))
|
|
1432
|
+
{
|
|
1433
|
+
try {
|
|
1434
|
+
new_size = serialized_obj.size()
|
|
1435
|
+
+ determine_serialized_size_additional_trees(*indexer_ptr, old_ntrees);
|
|
1436
|
+
new_ind_serialized = resize_vec(serialized_indexer, new_size);
|
|
1437
|
+
char *temp = (char*)RAW(new_ind_serialized);
|
|
1438
|
+
incremental_serialize_isotree(*indexer_ptr, temp);
|
|
1439
|
+
out["ind_ser"] = new_ind_serialized;
|
|
1440
|
+
}
|
|
1441
|
+
|
|
1442
|
+
catch (std::runtime_error &e) {
|
|
1443
|
+
goto serialize_anew_ind;
|
|
1444
|
+
}
|
|
1445
|
+
}
|
|
1446
|
+
|
|
1447
|
+
else {
|
|
1448
|
+
serialize_anew_ind:
|
|
1449
|
+
out["ind_ser"] = serialize_cpp_obj(indexer_ptr);
|
|
1450
|
+
}
|
|
1451
|
+
}
|
|
1452
|
+
}
|
|
1453
|
+
|
|
1454
|
+
catch (...)
|
|
1455
|
+
{
|
|
1456
|
+
if (!is_extended)
|
|
1457
|
+
model_ptr->trees.resize(old_ntrees);
|
|
1458
|
+
else
|
|
1459
|
+
ext_model_ptr->hplanes.resize(old_ntrees);
|
|
1460
|
+
|
|
1461
|
+
if (imputer_ptr != NULL)
|
|
1462
|
+
imputer_ptr->imputer_tree.resize(old_ntrees);
|
|
1463
|
+
if (indexer_ptr != NULL)
|
|
1464
|
+
indexer_ptr->indices.resize(old_ntrees);
|
|
1465
|
+
throw;
|
|
1466
|
+
}
|
|
1467
|
+
|
|
1468
|
+
model_cpp_obj_update["serialized"] = out["serialized"];
|
|
1469
|
+
if (imputer_ptr)
|
|
1470
|
+
model_cpp_obj_update["imp_ser"] = out["imp_ser"];
|
|
1471
|
+
if (indexer_ptr)
|
|
1472
|
+
model_cpp_obj_update["ind_ser"] = out["ind_ser"];
|
|
1473
|
+
*(INTEGER(ntrees_new)) = is_extended? ext_model_ptr->hplanes.size() : model_ptr->trees.size();
|
|
1474
|
+
model_params_update["ntrees"] = ntrees_new;
|
|
790
1475
|
}
|
|
791
1476
|
|
|
792
|
-
|
|
1477
|
+
SEXP alloc_List(void *data)
|
|
1478
|
+
{
|
|
1479
|
+
return Rcpp::List(*(size_t*)data);
|
|
1480
|
+
}
|
|
1481
|
+
|
|
1482
|
+
SEXP safe_CastString(void *data)
|
|
1483
|
+
{
|
|
1484
|
+
return Rcpp::CharacterVector(*(std::string*)data);
|
|
1485
|
+
}
|
|
1486
|
+
|
|
1487
|
+
// [[Rcpp::export(rng = false)]]
|
|
793
1488
|
Rcpp::ListOf<Rcpp::CharacterVector> model_to_sql(SEXP model_R_ptr, bool is_extended,
|
|
794
1489
|
Rcpp::CharacterVector numeric_colanmes,
|
|
795
1490
|
Rcpp::CharacterVector categ_colnames,
|
|
@@ -814,13 +1509,16 @@ Rcpp::ListOf<Rcpp::CharacterVector> model_to_sql(SEXP model_R_ptr, bool is_exten
|
|
|
814
1509
|
categ_levels_cpp,
|
|
815
1510
|
output_tree_num, true, single_tree, tree_num,
|
|
816
1511
|
nthreads);
|
|
817
|
-
|
|
1512
|
+
/* TODO: this function could create objects through the ALTREP system instead.
|
|
1513
|
+
That way, it would avoid an extra copy of the data */
|
|
1514
|
+
size_t sz = res.size();
|
|
1515
|
+
Rcpp::List out = Rcpp::unwindProtect(alloc_List, (void*)&sz);
|
|
818
1516
|
for (size_t ix = 0; ix < res.size(); ix++)
|
|
819
|
-
out[ix] = Rcpp::
|
|
1517
|
+
out[ix] = Rcpp::unwindProtect(safe_CastString, &(res[ix]));
|
|
820
1518
|
return out;
|
|
821
1519
|
}
|
|
822
1520
|
|
|
823
|
-
// [[Rcpp::export]]
|
|
1521
|
+
// [[Rcpp::export(rng = false)]]
|
|
824
1522
|
Rcpp::CharacterVector model_to_sql_with_select_from(SEXP model_R_ptr, bool is_extended,
|
|
825
1523
|
Rcpp::CharacterVector numeric_colanmes,
|
|
826
1524
|
Rcpp::CharacterVector categ_colnames,
|
|
@@ -842,11 +1540,976 @@ Rcpp::CharacterVector model_to_sql_with_select_from(SEXP model_R_ptr, bool is_ex
|
|
|
842
1540
|
std::string table_from_cpp = Rcpp::as<std::string>(table_from);
|
|
843
1541
|
std::string select_as_cpp = Rcpp::as<std::string>(select_as);
|
|
844
1542
|
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
1543
|
+
std::string out = generate_sql_with_select_from(model_ptr, ext_model_ptr,
|
|
1544
|
+
table_from_cpp, select_as_cpp,
|
|
1545
|
+
numeric_colanmes_cpp, categ_colanmes_cpp,
|
|
1546
|
+
categ_levels_cpp,
|
|
1547
|
+
true, nthreads);
|
|
1548
|
+
/* TODO: this function could create objects through the ALTREP system instead.
|
|
1549
|
+
That way, it would avoid an extra copy of the data */
|
|
1550
|
+
return Rcpp::unwindProtect(safe_CastString, &out);
|
|
1551
|
+
}
|
|
1552
|
+
|
|
1553
|
+
// [[Rcpp::export(rng = false)]]
|
|
1554
|
+
Rcpp::List copy_cpp_objects(SEXP model_R_ptr, bool is_extended, SEXP imp_R_ptr, bool has_imputer, SEXP ind_R_ptr)
|
|
1555
|
+
{
|
|
1556
|
+
bool has_indexer = !Rf_isNull(ind_R_ptr) && R_ExternalPtrAddr(ind_R_ptr) != NULL;
|
|
1557
|
+
|
|
1558
|
+
Rcpp::List out = Rcpp::List::create(
|
|
1559
|
+
Rcpp::_["ptr"] = R_NilValue,
|
|
1560
|
+
Rcpp::_["imp_ptr"] = R_NilValue,
|
|
1561
|
+
Rcpp::_["indexer"] = R_NilValue
|
|
1562
|
+
);
|
|
1563
|
+
|
|
1564
|
+
IsoForest* model_ptr = NULL;
|
|
1565
|
+
ExtIsoForest* ext_model_ptr = NULL;
|
|
1566
|
+
Imputer* imputer_ptr = NULL;
|
|
1567
|
+
TreesIndexer* indexer_ptr = NULL;
|
|
1568
|
+
if (is_extended)
|
|
1569
|
+
ext_model_ptr = static_cast<ExtIsoForest*>(R_ExternalPtrAddr(model_R_ptr));
|
|
1570
|
+
else
|
|
1571
|
+
model_ptr = static_cast<IsoForest*>(R_ExternalPtrAddr(model_R_ptr));
|
|
1572
|
+
if (has_imputer)
|
|
1573
|
+
imputer_ptr = static_cast<Imputer*>(R_ExternalPtrAddr(imp_R_ptr));
|
|
1574
|
+
if (has_indexer)
|
|
1575
|
+
indexer_ptr = static_cast<TreesIndexer*>(R_ExternalPtrAddr(ind_R_ptr));
|
|
1576
|
+
|
|
1577
|
+
std::unique_ptr<IsoForest> copy_model(new IsoForest());
|
|
1578
|
+
std::unique_ptr<ExtIsoForest> copy_ext_model(new ExtIsoForest());
|
|
1579
|
+
std::unique_ptr<Imputer> copy_imputer(new Imputer());
|
|
1580
|
+
std::unique_ptr<TreesIndexer> copy_indexer(new TreesIndexer());
|
|
1581
|
+
|
|
1582
|
+
if (model_ptr != NULL)
|
|
1583
|
+
*copy_model = *model_ptr;
|
|
1584
|
+
if (ext_model_ptr != NULL)
|
|
1585
|
+
*copy_ext_model = *ext_model_ptr;
|
|
1586
|
+
if (imputer_ptr != NULL)
|
|
1587
|
+
*copy_imputer = *imputer_ptr;
|
|
1588
|
+
if (indexer_ptr != NULL)
|
|
1589
|
+
*copy_indexer = *indexer_ptr;
|
|
1590
|
+
|
|
1591
|
+
if (is_extended) {
|
|
1592
|
+
out["ptr"] = Rcpp::unwindProtect(safe_XPtr<ExtIsoForest>, copy_ext_model.get());
|
|
1593
|
+
copy_ext_model.release();
|
|
1594
|
+
}
|
|
1595
|
+
else {
|
|
1596
|
+
out["ptr"] = Rcpp::unwindProtect(safe_XPtr<IsoForest>, copy_model.get());
|
|
1597
|
+
copy_model.release();
|
|
1598
|
+
}
|
|
1599
|
+
if (has_imputer) {
|
|
1600
|
+
out["imp_ptr"] = Rcpp::unwindProtect(safe_XPtr<Imputer>, copy_imputer.get());
|
|
1601
|
+
copy_imputer.release();
|
|
1602
|
+
}
|
|
1603
|
+
if (has_indexer) {
|
|
1604
|
+
out["indexer"] = Rcpp::unwindProtect(safe_XPtr<TreesIndexer>, copy_indexer.get());
|
|
1605
|
+
copy_indexer.release();
|
|
1606
|
+
}
|
|
1607
|
+
return out;
|
|
1608
|
+
}
|
|
1609
|
+
|
|
1610
|
+
// [[Rcpp::export(rng = false)]]
|
|
1611
|
+
void build_tree_indices(Rcpp::List lst_modify, bool is_extended, bool with_distances, int nthreads)
|
|
1612
|
+
{
|
|
1613
|
+
Rcpp::RawVector ind_ser = Rcpp::RawVector();
|
|
1614
|
+
Rcpp::List empty_lst = Rcpp::List::create(Rcpp::_["indexer"] = R_NilValue);
|
|
1615
|
+
std::unique_ptr<TreesIndexer> indexer(new TreesIndexer());
|
|
1616
|
+
|
|
1617
|
+
if (!is_extended) {
|
|
1618
|
+
build_tree_indices(*indexer,
|
|
1619
|
+
*static_cast<IsoForest*>(R_ExternalPtrAddr(lst_modify["ptr"])),
|
|
1620
|
+
nthreads,
|
|
1621
|
+
with_distances);
|
|
1622
|
+
}
|
|
1623
|
+
else {
|
|
1624
|
+
build_tree_indices(*indexer,
|
|
1625
|
+
*static_cast<ExtIsoForest*>(R_ExternalPtrAddr(lst_modify["ptr"])),
|
|
1626
|
+
nthreads,
|
|
1627
|
+
with_distances);
|
|
1628
|
+
}
|
|
1629
|
+
|
|
1630
|
+
ind_ser = serialize_cpp_obj(indexer.get());
|
|
1631
|
+
empty_lst["indexer"] = Rcpp::unwindProtect(safe_XPtr<TreesIndexer>, indexer.get());
|
|
1632
|
+
if (!Rf_isNull(lst_modify["indexer"])) {
|
|
1633
|
+
Rcpp::XPtr<TreesIndexer> indexer_R_ptr = lst_modify["indexer"];
|
|
1634
|
+
indexer_R_ptr.release();
|
|
1635
|
+
}
|
|
1636
|
+
|
|
1637
|
+
lst_modify["ind_ser"] = ind_ser;
|
|
1638
|
+
lst_modify["indexer"] = empty_lst["indexer"];
|
|
1639
|
+
indexer.release();
|
|
1640
|
+
}
|
|
1641
|
+
|
|
1642
|
+
// [[Rcpp::export(rng = false)]]
|
|
1643
|
+
bool check_node_indexer_has_distances(SEXP indexer_R_ptr)
|
|
1644
|
+
{
|
|
1645
|
+
if (Rf_isNull(indexer_R_ptr) || R_ExternalPtrAddr(indexer_R_ptr) == NULL)
|
|
1646
|
+
return false;
|
|
1647
|
+
TreesIndexer *indexer = static_cast<TreesIndexer*>(R_ExternalPtrAddr(indexer_R_ptr));
|
|
1648
|
+
if (indexer->indices.empty()) return false;
|
|
1649
|
+
return !indexer->indices.front().node_distances.empty();
|
|
1650
|
+
}
|
|
1651
|
+
|
|
1652
|
+
// [[Rcpp::export(rng = false)]]
|
|
1653
|
+
void set_reference_points(Rcpp::List lst_modify, Rcpp::List lst_modify2, SEXP rnames, bool is_extended,
|
|
1654
|
+
Rcpp::NumericVector X_num, Rcpp::IntegerVector X_cat,
|
|
1655
|
+
Rcpp::NumericVector Xc, Rcpp::IntegerVector Xc_ind, Rcpp::IntegerVector Xc_indptr,
|
|
1656
|
+
size_t nrows, int nthreads, bool with_distances)
|
|
1657
|
+
{
|
|
1658
|
+
Rcpp::RawVector ind_ser = Rcpp::RawVector();
|
|
1659
|
+
Rcpp::XPtr<TreesIndexer> indexer_R_ptr = lst_modify["indexer"];
|
|
1660
|
+
|
|
1661
|
+
double* numeric_data_ptr = NULL;
|
|
1662
|
+
int* categ_data_ptr = NULL;
|
|
1663
|
+
double* Xc_ptr = NULL;
|
|
1664
|
+
int* Xc_ind_ptr = NULL;
|
|
1665
|
+
int* Xc_indptr_ptr = NULL;
|
|
1666
|
+
Rcpp::NumericVector Xcpp;
|
|
1667
|
+
|
|
1668
|
+
if (X_num.size())
|
|
1669
|
+
{
|
|
1670
|
+
numeric_data_ptr = REAL(X_num);
|
|
1671
|
+
}
|
|
1672
|
+
|
|
1673
|
+
if (X_cat.size())
|
|
1674
|
+
{
|
|
1675
|
+
categ_data_ptr = INTEGER(X_cat);
|
|
1676
|
+
}
|
|
1677
|
+
|
|
1678
|
+
if (Xc_indptr.size())
|
|
1679
|
+
{
|
|
1680
|
+
Xc_ptr = REAL(Xc);
|
|
1681
|
+
Xc_ind_ptr = INTEGER(Xc_ind);
|
|
1682
|
+
Xc_indptr_ptr = INTEGER(Xc_indptr);
|
|
1683
|
+
}
|
|
1684
|
+
|
|
1685
|
+
IsoForest* model_ptr = NULL;
|
|
1686
|
+
ExtIsoForest* ext_model_ptr = NULL;
|
|
1687
|
+
TreesIndexer* indexer = NULL;
|
|
1688
|
+
if (is_extended)
|
|
1689
|
+
ext_model_ptr = static_cast<ExtIsoForest*>(R_ExternalPtrAddr(lst_modify["ptr"]));
|
|
1690
|
+
else
|
|
1691
|
+
model_ptr = static_cast<IsoForest*>(R_ExternalPtrAddr(lst_modify["ptr"]));
|
|
1692
|
+
indexer = indexer_R_ptr.get();
|
|
1693
|
+
|
|
1694
|
+
MissingAction missing_action = is_extended?
|
|
1695
|
+
ext_model_ptr->missing_action
|
|
1696
|
+
:
|
|
1697
|
+
model_ptr->missing_action;
|
|
1698
|
+
if (missing_action != Fail)
|
|
1699
|
+
{
|
|
1700
|
+
if (X_num.size()) numeric_data_ptr = set_R_nan_as_C_nan(numeric_data_ptr, X_num.size(), Xcpp, nthreads);
|
|
1701
|
+
if (Xc.size()) Xc_ptr = set_R_nan_as_C_nan(Xc_ptr, Xc.size(), Xcpp, nthreads);
|
|
1702
|
+
}
|
|
1703
|
+
|
|
1704
|
+
std::unique_ptr<TreesIndexer> new_indexer(new TreesIndexer(*indexer));
|
|
1705
|
+
|
|
1706
|
+
set_reference_points(model_ptr, ext_model_ptr, new_indexer.get(),
|
|
1707
|
+
with_distances,
|
|
1708
|
+
numeric_data_ptr, categ_data_ptr,
|
|
1709
|
+
true, (size_t)0, (size_t)0,
|
|
1710
|
+
Xc_ptr, Xc_ind_ptr, Xc_indptr_ptr,
|
|
1711
|
+
(double*)NULL, (int*)NULL, (int*)NULL,
|
|
1712
|
+
nrows, nthreads);
|
|
1713
|
+
|
|
1714
|
+
ind_ser = serialize_cpp_obj(new_indexer.get());
|
|
1715
|
+
*indexer = std::move(*new_indexer);
|
|
1716
|
+
new_indexer.release();
|
|
1717
|
+
lst_modify["ind_ser"] = ind_ser;
|
|
1718
|
+
lst_modify2["reference_names"] = rnames;
|
|
1719
|
+
}
|
|
1720
|
+
|
|
1721
|
+
// [[Rcpp::export(rng = false)]]
|
|
1722
|
+
bool check_node_indexer_has_references(SEXP indexer_R_ptr)
|
|
1723
|
+
{
|
|
1724
|
+
if (Rf_isNull(indexer_R_ptr) || R_ExternalPtrAddr(indexer_R_ptr) == NULL)
|
|
1725
|
+
return false;
|
|
1726
|
+
TreesIndexer *indexer = static_cast<TreesIndexer*>(R_ExternalPtrAddr(indexer_R_ptr));
|
|
1727
|
+
if (indexer->indices.empty())
|
|
1728
|
+
return false;
|
|
1729
|
+
if (indexer->indices.front().reference_points.empty())
|
|
1730
|
+
return false;
|
|
1731
|
+
else
|
|
1732
|
+
return true;
|
|
1733
|
+
}
|
|
1734
|
+
|
|
1735
|
+
// [[Rcpp::export(rng = false)]]
|
|
1736
|
+
int get_num_references(SEXP indexer_R_ptr)
|
|
1737
|
+
{
|
|
1738
|
+
TreesIndexer *indexer = static_cast<TreesIndexer*>(R_ExternalPtrAddr(indexer_R_ptr));
|
|
1739
|
+
if (indexer == NULL || indexer->indices.empty()) return 0;
|
|
1740
|
+
return indexer->indices.front().reference_points.size();
|
|
1741
|
+
}
|
|
1742
|
+
|
|
1743
|
+
// [[Rcpp::export(rng = false)]]
|
|
1744
|
+
SEXP get_null_R_pointer()
|
|
1745
|
+
{
|
|
1746
|
+
return R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue);
|
|
1747
|
+
}
|
|
1748
|
+
|
|
1749
|
+
/* This library will use different code paths for opening a file path
|
|
1750
|
+
in order to support non-ASCII characters, depending on compiler and
|
|
1751
|
+
platform support. */
|
|
1752
|
+
#if (defined(_WIN32) || defined(_WIN64))
|
|
1753
|
+
# if defined(__GNUC__) && (__GNUC__ >= 5)
|
|
1754
|
+
# define USE_CODECVT
|
|
1755
|
+
# define TAKE_AS_UTF8 true
|
|
1756
|
+
# elif !defined(_FOR_CRAN)
|
|
1757
|
+
# define USE_RC_FOPEN
|
|
1758
|
+
# define TAKE_AS_UTF8 false
|
|
1759
|
+
# else
|
|
1760
|
+
# define USE_SIMPLE_FOPEN
|
|
1761
|
+
# define TAKE_AS_UTF8 false
|
|
1762
|
+
# endif
|
|
1763
|
+
#else
|
|
1764
|
+
# define USE_SIMPLE_FOPEN
|
|
1765
|
+
# define TAKE_AS_UTF8 false
|
|
1766
|
+
#endif
|
|
1767
|
+
|
|
1768
|
+
/* Now the actual implementations */
|
|
1769
|
+
#ifdef USE_CODECVT
|
|
1770
|
+
/* https://stackoverflow.com/questions/2573834/c-convert-string-or-char-to-wstring-or-wchar-t */
|
|
1771
|
+
/* */
|
|
1772
|
+
#include <locale>
|
|
1773
|
+
#include <codecvt>
|
|
1774
|
+
#include <string>
|
|
1775
|
+
FILE* R_fopen(Rcpp::CharacterVector fname, const char *mode)
|
|
1776
|
+
{
|
|
1777
|
+
Rcpp::String s(fname[0], CE_UTF8);
|
|
1778
|
+
std::wstring_convert<std::codecvt_utf8_utf16<wchar_t>> converter;
|
|
1779
|
+
std::wstring wide = converter.from_bytes(s.get_cstring());
|
|
1780
|
+
std::string mode__(mode);
|
|
1781
|
+
std::wstring mode_ = converter.from_bytes(mode__);
|
|
1782
|
+
return _wfopen(wide.c_str(), mode_.c_str());
|
|
1783
|
+
}
|
|
1784
|
+
#endif
|
|
1785
|
+
|
|
1786
|
+
#ifdef USE_RC_FOPEN
|
|
1787
|
+
extern "C" {
|
|
1788
|
+
FILE *RC_fopen(const SEXP fn, const char *mode, const Rboolean expand);
|
|
1789
|
+
}
|
|
1790
|
+
FILE* R_fopen(Rcpp::CharacterVector fname, const char *mode)
|
|
1791
|
+
{
|
|
1792
|
+
return RC_fopen(fname[0], mode, FALSE);
|
|
1793
|
+
}
|
|
1794
|
+
#endif
|
|
1795
|
+
|
|
1796
|
+
#ifdef USE_SIMPLE_FOPEN
|
|
1797
|
+
FILE* R_fopen(Rcpp::CharacterVector fname, const char *mode)
|
|
1798
|
+
{
|
|
1799
|
+
return fopen(fname[0], mode);
|
|
1800
|
+
}
|
|
1801
|
+
#endif
|
|
1802
|
+
|
|
1803
|
+
class FileOpener
|
|
1804
|
+
{
|
|
1805
|
+
public:
|
|
1806
|
+
FILE *handle = NULL;
|
|
1807
|
+
FileOpener(const SEXP fname, const char *mode)
|
|
1808
|
+
{
|
|
1809
|
+
if (this->handle != NULL)
|
|
1810
|
+
this->close_file();
|
|
1811
|
+
this->handle = R_fopen(fname, mode);
|
|
1812
|
+
}
|
|
1813
|
+
FILE *get_handle()
|
|
1814
|
+
{
|
|
1815
|
+
return this->handle;
|
|
1816
|
+
}
|
|
1817
|
+
void close_file()
|
|
1818
|
+
{
|
|
1819
|
+
if (this->handle != NULL) {
|
|
1820
|
+
fclose(this->handle);
|
|
1821
|
+
this->handle = NULL;
|
|
1822
|
+
}
|
|
1823
|
+
}
|
|
1824
|
+
~FileOpener()
|
|
1825
|
+
{
|
|
1826
|
+
this->close_file();
|
|
1827
|
+
}
|
|
1828
|
+
};
|
|
1829
|
+
|
|
1830
|
+
// [[Rcpp::export]]
|
|
1831
|
+
void serialize_to_file
|
|
1832
|
+
(
|
|
1833
|
+
Rcpp::RawVector serialized_obj,
|
|
1834
|
+
Rcpp::RawVector serialized_imputer,
|
|
1835
|
+
Rcpp::RawVector serialized_indexer,
|
|
1836
|
+
bool is_extended,
|
|
1837
|
+
Rcpp::RawVector metadata,
|
|
1838
|
+
Rcpp::CharacterVector fname
|
|
1839
|
+
)
|
|
1840
|
+
{
|
|
1841
|
+
FileOpener file_(fname[0], "wb");
|
|
1842
|
+
FILE *output_file = file_.get_handle();
|
|
1843
|
+
serialize_combined(
|
|
1844
|
+
is_extended? nullptr : (char*)RAW(serialized_obj),
|
|
1845
|
+
is_extended? (char*)RAW(serialized_obj) : nullptr,
|
|
1846
|
+
serialized_imputer.size()? (char*)RAW(serialized_imputer) : nullptr,
|
|
1847
|
+
serialized_indexer.size()? (char*)RAW(serialized_indexer) : nullptr,
|
|
1848
|
+
metadata.size()? (char*)RAW(metadata) : nullptr,
|
|
1849
|
+
metadata.size(),
|
|
1850
|
+
output_file
|
|
1851
|
+
);
|
|
1852
|
+
}
|
|
1853
|
+
|
|
1854
|
+
// [[Rcpp::export]]
|
|
1855
|
+
Rcpp::List deserialize_from_file(Rcpp::CharacterVector fname)
|
|
1856
|
+
{
|
|
1857
|
+
Rcpp::List out = Rcpp::List::create(
|
|
1858
|
+
Rcpp::_["ptr"] = R_NilValue,
|
|
1859
|
+
Rcpp::_["serialized"] = R_NilValue,
|
|
1860
|
+
Rcpp::_["imp_ptr"] = R_NilValue,
|
|
1861
|
+
Rcpp::_["imp_ser"] = R_NilValue,
|
|
1862
|
+
Rcpp::_["indexer"] = R_NilValue,
|
|
1863
|
+
Rcpp::_["ind_ser"] = R_NilValue,
|
|
1864
|
+
Rcpp::_["metadata"] = R_NilValue
|
|
1865
|
+
);
|
|
1866
|
+
|
|
1867
|
+
FileOpener file_(fname[0], "rb");
|
|
1868
|
+
FILE *input_file = file_.get_handle();
|
|
1869
|
+
|
|
1870
|
+
bool is_isotree_model;
|
|
1871
|
+
bool is_compatible;
|
|
1872
|
+
bool has_combined_objects;
|
|
1873
|
+
bool has_IsoForest;
|
|
1874
|
+
bool has_ExtIsoForest;
|
|
1875
|
+
bool has_Imputer;
|
|
1876
|
+
bool has_Indexer;
|
|
1877
|
+
bool has_metadata;
|
|
1878
|
+
size_t size_metadata;
|
|
1879
|
+
|
|
1880
|
+
inspect_serialized_object(
|
|
1881
|
+
input_file,
|
|
1882
|
+
is_isotree_model,
|
|
1883
|
+
is_compatible,
|
|
1884
|
+
has_combined_objects,
|
|
1885
|
+
has_IsoForest,
|
|
1886
|
+
has_ExtIsoForest,
|
|
1887
|
+
has_Imputer,
|
|
1888
|
+
has_Indexer,
|
|
1889
|
+
has_metadata,
|
|
1890
|
+
size_metadata
|
|
1891
|
+
);
|
|
1892
|
+
|
|
1893
|
+
if (!is_isotree_model || !has_combined_objects)
|
|
1894
|
+
Rcpp::stop("Input file is not a serialized isotree model.\n");
|
|
1895
|
+
if (!is_compatible)
|
|
1896
|
+
Rcpp::stop("Model file format is incompatible.\n");
|
|
1897
|
+
if (!size_metadata)
|
|
1898
|
+
Rcpp::stop("Input file does not contain metadata.\n");
|
|
1899
|
+
|
|
1900
|
+
out["metadata"] = Rcpp::unwindProtect(alloc_RawVec, (void*)&size_metadata);
|
|
1901
|
+
|
|
1902
|
+
std::unique_ptr<IsoForest> model(new IsoForest());
|
|
1903
|
+
std::unique_ptr<ExtIsoForest> model_ext(new ExtIsoForest());
|
|
1904
|
+
std::unique_ptr<Imputer> imputer(new Imputer());
|
|
1905
|
+
std::unique_ptr<TreesIndexer> indexer(new TreesIndexer());
|
|
1906
|
+
|
|
1907
|
+
IsoForest *ptr_model = NULL;
|
|
1908
|
+
ExtIsoForest *ptr_model_ext = NULL;
|
|
1909
|
+
Imputer *ptr_imputer = NULL;
|
|
1910
|
+
TreesIndexer *ptr_indexer = NULL;
|
|
1911
|
+
char *ptr_metadata = (char*)RAW(out["metadata"]);
|
|
1912
|
+
|
|
1913
|
+
if (has_IsoForest)
|
|
1914
|
+
ptr_model = model.get();
|
|
1915
|
+
if (has_ExtIsoForest)
|
|
1916
|
+
ptr_model_ext = model_ext.get();
|
|
1917
|
+
if (has_Imputer)
|
|
1918
|
+
ptr_imputer = imputer.get();
|
|
1919
|
+
if (has_Indexer)
|
|
1920
|
+
ptr_indexer = indexer.get();
|
|
1921
|
+
|
|
1922
|
+
deserialize_combined(
|
|
1923
|
+
input_file,
|
|
1924
|
+
ptr_model,
|
|
1925
|
+
ptr_model_ext,
|
|
1926
|
+
ptr_imputer,
|
|
1927
|
+
ptr_indexer,
|
|
1928
|
+
ptr_metadata
|
|
1929
|
+
);
|
|
1930
|
+
|
|
1931
|
+
if (has_IsoForest)
|
|
1932
|
+
out["serialized"] = serialize_cpp_obj(model.get());
|
|
1933
|
+
else
|
|
1934
|
+
out["serialized"] = serialize_cpp_obj(model_ext.get());
|
|
1935
|
+
if (has_Imputer)
|
|
1936
|
+
out["imp_ser"] = serialize_cpp_obj(imputer.get());
|
|
1937
|
+
if (has_Indexer)
|
|
1938
|
+
out["ind_ser"] = serialize_cpp_obj(indexer.get());
|
|
1939
|
+
|
|
1940
|
+
if (has_IsoForest) {
|
|
1941
|
+
out["ptr"] = Rcpp::unwindProtect(safe_XPtr<IsoForest>, model.get());
|
|
1942
|
+
model.release();
|
|
1943
|
+
}
|
|
1944
|
+
else {
|
|
1945
|
+
out["ptr"] = Rcpp::unwindProtect(safe_XPtr<ExtIsoForest>, model_ext.get());
|
|
1946
|
+
model_ext.release();
|
|
1947
|
+
}
|
|
1948
|
+
if (has_Imputer) {
|
|
1949
|
+
out["imp_ptr"] = Rcpp::unwindProtect(safe_XPtr<Imputer>, imputer.get());
|
|
1950
|
+
imputer.release();
|
|
1951
|
+
}
|
|
1952
|
+
if (has_Indexer) {
|
|
1953
|
+
out["indexer"] = Rcpp::unwindProtect(safe_XPtr<TreesIndexer>, indexer.get());
|
|
1954
|
+
indexer.release();
|
|
1955
|
+
}
|
|
1956
|
+
|
|
1957
|
+
return out;
|
|
1958
|
+
}
|
|
1959
|
+
|
|
1960
|
+
/* The functions below make for missing functionality in the
|
|
1961
|
+
'Matrix' and 'SparseM' packages for sub-setting the data */
|
|
1962
|
+
|
|
1963
|
+
// [[Rcpp::export(rng = false)]]
|
|
1964
|
+
void call_sort_csc_indices(Rcpp::NumericVector Xc, Rcpp::IntegerVector Xc_ind, Rcpp::IntegerVector Xc_indptr)
|
|
1965
|
+
{
|
|
1966
|
+
size_t ncols_numeric = Xc_indptr.size() - 1;
|
|
1967
|
+
sort_csc_indices(REAL(Xc), INTEGER(Xc_ind), INTEGER(Xc_indptr), ncols_numeric);
|
|
1968
|
+
}
|
|
1969
|
+
|
|
1970
|
+
// [[Rcpp::export(rng = false)]]
|
|
1971
|
+
void call_reconstruct_csr_sliced
|
|
1972
|
+
(
|
|
1973
|
+
Rcpp::NumericVector orig_Xr, Rcpp::IntegerVector orig_Xr_indptr,
|
|
1974
|
+
Rcpp::NumericVector rec_Xr, Rcpp::IntegerVector rec_Xr_indptr,
|
|
1975
|
+
size_t nrows
|
|
1976
|
+
)
|
|
1977
|
+
{
|
|
1978
|
+
reconstruct_csr_sliced<double, int>(
|
|
1979
|
+
REAL(orig_Xr), INTEGER(orig_Xr_indptr),
|
|
1980
|
+
REAL(rec_Xr), INTEGER(rec_Xr_indptr),
|
|
1981
|
+
nrows
|
|
1982
|
+
);
|
|
1983
|
+
}
|
|
1984
|
+
|
|
1985
|
+
// [[Rcpp::export(rng = false)]]
|
|
1986
|
+
void call_reconstruct_csr_with_categ
|
|
1987
|
+
(
|
|
1988
|
+
Rcpp::NumericVector orig_Xr, Rcpp::IntegerVector orig_Xr_ind, Rcpp::IntegerVector orig_Xr_indptr,
|
|
1989
|
+
Rcpp::NumericVector rec_Xr, Rcpp::IntegerVector rec_Xr_ind, Rcpp::IntegerVector rec_Xr_indptr,
|
|
1990
|
+
Rcpp::IntegerVector rec_X_cat,
|
|
1991
|
+
Rcpp::IntegerVector cols_numeric, Rcpp::IntegerVector cols_categ,
|
|
1992
|
+
size_t nrows, size_t ncols
|
|
1993
|
+
)
|
|
1994
|
+
{
|
|
1995
|
+
reconstruct_csr_with_categ<double, int, int>(
|
|
1996
|
+
REAL(orig_Xr), INTEGER(orig_Xr_ind), INTEGER(orig_Xr_indptr),
|
|
1997
|
+
REAL(rec_Xr), INTEGER(rec_Xr_ind), INTEGER(rec_Xr_indptr),
|
|
1998
|
+
INTEGER(rec_X_cat), true,
|
|
1999
|
+
INTEGER(cols_numeric), INTEGER(cols_categ),
|
|
2000
|
+
nrows, ncols, cols_numeric.size(), cols_categ.size()
|
|
2001
|
+
);
|
|
2002
|
+
}
|
|
2003
|
+
|
|
2004
|
+
// [[Rcpp::export(rng = false)]]
|
|
2005
|
+
Rcpp::NumericVector deepcopy_vector(Rcpp::NumericVector inp)
|
|
2006
|
+
{
|
|
2007
|
+
return Rcpp::NumericVector(inp.begin(), inp.end());
|
|
2008
|
+
}
|
|
2009
|
+
|
|
2010
|
+
Rcpp::IntegerMatrix csc_to_dense_int
|
|
2011
|
+
(
|
|
2012
|
+
Rcpp::NumericVector Xc,
|
|
2013
|
+
Rcpp::IntegerVector Xc_ind,
|
|
2014
|
+
Rcpp::IntegerVector Xc_indptr,
|
|
2015
|
+
size_t nrows
|
|
2016
|
+
)
|
|
2017
|
+
{
|
|
2018
|
+
size_t ncols = Xc_indptr.size() - 1;
|
|
2019
|
+
Rcpp::IntegerMatrix out_(nrows, ncols);
|
|
2020
|
+
int *restrict out = INTEGER(out_);
|
|
2021
|
+
for (size_t col = 0; col < ncols; col++)
|
|
2022
|
+
{
|
|
2023
|
+
for (auto ix = Xc_indptr[col]; ix < Xc_indptr[col+1]; ix++)
|
|
2024
|
+
out[(size_t)Xc_ind[ix] + col*nrows]
|
|
2025
|
+
=
|
|
2026
|
+
(Xc[ix] >= 0 && !ISNAN(Xc[ix]))?
|
|
2027
|
+
(int)Xc[ix] : (int)(-1);
|
|
2028
|
+
}
|
|
2029
|
+
return out_;
|
|
2030
|
+
}
|
|
2031
|
+
|
|
2032
|
+
template <class real_vec, class int_vec>
|
|
2033
|
+
Rcpp::IntegerMatrix csr_to_dense_int
|
|
2034
|
+
(
|
|
2035
|
+
real_vec Xr,
|
|
2036
|
+
int_vec Xr_ind,
|
|
2037
|
+
int_vec Xr_indptr,
|
|
2038
|
+
int ncols
|
|
2039
|
+
)
|
|
2040
|
+
{
|
|
2041
|
+
size_t nrows = Xr_indptr.size() - 1;
|
|
2042
|
+
size_t matrix_dims[] = {nrows, (size_t)ncols};
|
|
2043
|
+
Rcpp::IntegerMatrix out_ = Rcpp::unwindProtect(safe_int_matrix, (void*)matrix_dims);
|
|
2044
|
+
int *restrict out = INTEGER(out_);
|
|
2045
|
+
for (size_t row = 0; row < nrows; row++)
|
|
2046
|
+
{
|
|
2047
|
+
for (auto ix = Xr_indptr[row]; ix < Xr_indptr[row+1]; ix++)
|
|
2048
|
+
out[row + (size_t)Xr_ind[ix]*nrows]
|
|
2049
|
+
=
|
|
2050
|
+
(Xr[ix] >= 0 && !ISNAN(Xr[ix]))?
|
|
2051
|
+
(int)Xr[ix] : (int)(-1);
|
|
2052
|
+
}
|
|
2053
|
+
return out_;
|
|
2054
|
+
}
|
|
2055
|
+
|
|
2056
|
+
// [[Rcpp::export(rng = false)]]
|
|
2057
|
+
Rcpp::List call_take_cols_by_slice_csr
|
|
2058
|
+
(
|
|
2059
|
+
Rcpp::NumericVector Xr_,
|
|
2060
|
+
Rcpp::IntegerVector Xr_ind_,
|
|
2061
|
+
Rcpp::IntegerVector Xr_indptr,
|
|
2062
|
+
int ncols_take,
|
|
2063
|
+
bool as_dense
|
|
2064
|
+
)
|
|
2065
|
+
{
|
|
2066
|
+
/* Indices need to be sorted beforehand */
|
|
2067
|
+
double *restrict Xr = REAL(Xr_);
|
|
2068
|
+
int *restrict Xr_ind = INTEGER(Xr_ind_);
|
|
2069
|
+
size_t nrows = Xr_indptr.size() - 1;
|
|
2070
|
+
Rcpp::IntegerVector out_Xr_indptr(nrows+1);
|
|
2071
|
+
out_Xr_indptr[0] = 0;
|
|
2072
|
+
size_t total_size = 0;
|
|
2073
|
+
for (size_t row = 0; row < nrows; row++)
|
|
2074
|
+
{
|
|
2075
|
+
for (auto col = Xr_indptr[row]; col < Xr_indptr[row+1]; col++)
|
|
2076
|
+
total_size += Xr_ind[col] < ncols_take;
|
|
2077
|
+
out_Xr_indptr[row+1] = total_size;
|
|
2078
|
+
}
|
|
2079
|
+
|
|
2080
|
+
Rcpp::NumericVector out_Xr_(total_size);
|
|
2081
|
+
Rcpp::IntegerVector out_Xr_ind_(total_size);
|
|
2082
|
+
double *restrict out_Xr = REAL(out_Xr_);
|
|
2083
|
+
int *restrict out_Xr_ind = INTEGER(out_Xr_ind_);
|
|
2084
|
+
|
|
2085
|
+
size_t n_this;
|
|
2086
|
+
for (size_t row = 0; row < nrows; row++)
|
|
2087
|
+
{
|
|
2088
|
+
n_this = out_Xr_indptr[row+1] - out_Xr_indptr[row];
|
|
2089
|
+
if (n_this) {
|
|
2090
|
+
std::copy(Xr + Xr_indptr[row],
|
|
2091
|
+
Xr + Xr_indptr[row] + n_this,
|
|
2092
|
+
out_Xr + out_Xr_indptr[row]);
|
|
2093
|
+
std::copy(Xr_ind + Xr_indptr[row],
|
|
2094
|
+
Xr_ind + Xr_indptr[row] + n_this,
|
|
2095
|
+
out_Xr_ind + out_Xr_indptr[row]);
|
|
2096
|
+
}
|
|
2097
|
+
}
|
|
2098
|
+
|
|
2099
|
+
if (!as_dense)
|
|
2100
|
+
return Rcpp::List::create(
|
|
2101
|
+
Rcpp::_["Xr"] = out_Xr_,
|
|
2102
|
+
Rcpp::_["Xr_ind"] = out_Xr_ind_,
|
|
2103
|
+
Rcpp::_["Xr_indptr"] = out_Xr_indptr
|
|
2104
|
+
);
|
|
2105
|
+
else
|
|
2106
|
+
return Rcpp::List::create(
|
|
2107
|
+
Rcpp::_["X_cat"] = csr_to_dense_int(out_Xr_,
|
|
2108
|
+
out_Xr_ind_,
|
|
2109
|
+
out_Xr_indptr,
|
|
2110
|
+
ncols_take)
|
|
2111
|
+
);
|
|
2112
|
+
}
|
|
2113
|
+
|
|
2114
|
+
// [[Rcpp::export(rng = false)]]
|
|
2115
|
+
Rcpp::List call_take_cols_by_index_csr
|
|
2116
|
+
(
|
|
2117
|
+
Rcpp::NumericVector Xr,
|
|
2118
|
+
Rcpp::IntegerVector Xr_ind,
|
|
2119
|
+
Rcpp::IntegerVector Xr_indptr,
|
|
2120
|
+
Rcpp::IntegerVector cols_take,
|
|
2121
|
+
bool as_dense
|
|
2122
|
+
)
|
|
2123
|
+
{
|
|
2124
|
+
Rcpp::List out;
|
|
2125
|
+
if (!as_dense) {
|
|
2126
|
+
out = Rcpp::List::create(
|
|
2127
|
+
Rcpp::_["Xr"] = R_NilValue,
|
|
2128
|
+
Rcpp::_["Xr_ind"] = R_NilValue,
|
|
2129
|
+
Rcpp::_["Xr_indptr"] = R_NilValue
|
|
2130
|
+
);
|
|
2131
|
+
}
|
|
2132
|
+
else {
|
|
2133
|
+
out = Rcpp::List::create(
|
|
2134
|
+
Rcpp::_["X_cat"] = R_NilValue
|
|
2135
|
+
);
|
|
2136
|
+
}
|
|
2137
|
+
|
|
2138
|
+
|
|
2139
|
+
/* 'cols_take' should be sorted */
|
|
2140
|
+
int n_take = cols_take.size();
|
|
2141
|
+
int nrows = Xr_indptr.size() - 1;
|
|
2142
|
+
std::vector<double> out_Xr;
|
|
2143
|
+
std::vector<int> out_Xr_ind;
|
|
2144
|
+
std::vector<int> out_Xr_indptr(nrows + 1);
|
|
2145
|
+
|
|
2146
|
+
int *curr_ptr;
|
|
2147
|
+
int *end_ptr;
|
|
2148
|
+
int *restrict ptr_Xr_ind = INTEGER(Xr_ind);
|
|
2149
|
+
int *restrict ptr_cols_take = INTEGER(cols_take);
|
|
2150
|
+
int *restrict ptr_cols_take_end = ptr_cols_take + n_take;
|
|
2151
|
+
int curr_col;
|
|
2152
|
+
int *search_res;
|
|
2153
|
+
|
|
2154
|
+
for (int row = 0; row < nrows; row++)
|
|
2155
|
+
{
|
|
2156
|
+
curr_ptr = ptr_Xr_ind + Xr_indptr[row];
|
|
2157
|
+
end_ptr = ptr_Xr_ind + Xr_indptr[row+1];
|
|
2158
|
+
curr_col = 0;
|
|
2159
|
+
|
|
2160
|
+
if (end_ptr == curr_ptr + 1)
|
|
2161
|
+
{
|
|
2162
|
+
search_res = std::lower_bound(ptr_cols_take, ptr_cols_take_end, *curr_ptr);
|
|
2163
|
+
curr_col = std::distance(ptr_cols_take, search_res);
|
|
2164
|
+
if (curr_col < n_take && *search_res == *curr_ptr)
|
|
2165
|
+
{
|
|
2166
|
+
out_Xr.push_back(Xr[std::distance(ptr_Xr_ind, curr_ptr)]);
|
|
2167
|
+
out_Xr_ind.push_back(curr_col);
|
|
2168
|
+
}
|
|
2169
|
+
}
|
|
2170
|
+
|
|
2171
|
+
else
|
|
2172
|
+
if (end_ptr > curr_ptr)
|
|
2173
|
+
{
|
|
2174
|
+
while (true)
|
|
2175
|
+
{
|
|
2176
|
+
curr_ptr = std::lower_bound(curr_ptr, end_ptr, ptr_cols_take[curr_col]);
|
|
2177
|
+
|
|
2178
|
+
if (curr_ptr >= end_ptr)
|
|
2179
|
+
{
|
|
2180
|
+
break;
|
|
2181
|
+
}
|
|
2182
|
+
|
|
2183
|
+
|
|
2184
|
+
else if (*curr_ptr == ptr_cols_take[curr_col])
|
|
2185
|
+
{
|
|
2186
|
+
out_Xr.push_back(Xr[std::distance(ptr_Xr_ind, curr_ptr)]);
|
|
2187
|
+
out_Xr_ind.push_back(curr_col);
|
|
2188
|
+
curr_ptr++;
|
|
2189
|
+
curr_col++;
|
|
2190
|
+
|
|
2191
|
+
if (curr_ptr >= end_ptr || curr_col >= n_take)
|
|
2192
|
+
break;
|
|
2193
|
+
}
|
|
2194
|
+
|
|
2195
|
+
|
|
2196
|
+
else
|
|
2197
|
+
{
|
|
2198
|
+
curr_col = std::distance(
|
|
2199
|
+
ptr_cols_take,
|
|
2200
|
+
std::lower_bound(ptr_cols_take + curr_col, ptr_cols_take_end, *curr_ptr)
|
|
2201
|
+
);
|
|
2202
|
+
|
|
2203
|
+
if (curr_col >= n_take)
|
|
2204
|
+
break;
|
|
2205
|
+
|
|
2206
|
+
if (curr_col == *curr_ptr) {
|
|
2207
|
+
out_Xr.push_back(Xr[std::distance(ptr_Xr_ind, curr_ptr)]);
|
|
2208
|
+
out_Xr_ind.push_back(curr_col);
|
|
2209
|
+
curr_ptr++;
|
|
2210
|
+
curr_col++;
|
|
2211
|
+
}
|
|
2212
|
+
|
|
2213
|
+
if (curr_ptr >= end_ptr || curr_col >= n_take)
|
|
2214
|
+
break;
|
|
2215
|
+
}
|
|
2216
|
+
}
|
|
2217
|
+
}
|
|
2218
|
+
|
|
2219
|
+
out_Xr_indptr[row+1] = out_Xr.size();
|
|
2220
|
+
}
|
|
2221
|
+
|
|
2222
|
+
if (!as_dense)
|
|
2223
|
+
{
|
|
2224
|
+
out["Xr"] = Rcpp::unwindProtect(safe_copy_vec, (void*)&out_Xr);
|
|
2225
|
+
out["Xr_ind"] = Rcpp::unwindProtect(safe_copy_intvec, (void*)&out_Xr_ind);
|
|
2226
|
+
out["Xr_indptr"] = Rcpp::unwindProtect(safe_copy_intvec, (void*)&out_Xr_indptr);
|
|
2227
|
+
}
|
|
2228
|
+
|
|
2229
|
+
else
|
|
2230
|
+
{
|
|
2231
|
+
out["X_cat"] = csr_to_dense_int(out_Xr,
|
|
2232
|
+
out_Xr_ind,
|
|
2233
|
+
out_Xr_indptr,
|
|
2234
|
+
n_take);
|
|
2235
|
+
}
|
|
2236
|
+
|
|
2237
|
+
return out;
|
|
2238
|
+
}
|
|
2239
|
+
|
|
2240
|
+
// [[Rcpp::export(rng = false)]]
|
|
2241
|
+
Rcpp::List call_take_cols_by_slice_csc
|
|
2242
|
+
(
|
|
2243
|
+
Rcpp::NumericVector Xc,
|
|
2244
|
+
Rcpp::IntegerVector Xc_ind,
|
|
2245
|
+
Rcpp::IntegerVector Xc_indptr,
|
|
2246
|
+
size_t ncols_take,
|
|
2247
|
+
bool as_dense, size_t nrows
|
|
2248
|
+
)
|
|
2249
|
+
{
|
|
2250
|
+
Rcpp::IntegerVector out_Xc_indptr(ncols_take+1);
|
|
2251
|
+
size_t total_size = Xc_indptr[ncols_take+1];
|
|
2252
|
+
Rcpp::NumericVector out_Xc(REAL(Xc), REAL(Xc) + total_size);
|
|
2253
|
+
Rcpp::IntegerVector out_Xc_ind(INTEGER(Xc_ind), INTEGER(Xc_ind) + total_size);
|
|
2254
|
+
|
|
2255
|
+
if (!as_dense)
|
|
2256
|
+
return Rcpp::List::create(
|
|
2257
|
+
Rcpp::_["Xc"] = out_Xc,
|
|
2258
|
+
Rcpp::_["Xc_ind"] = out_Xc_ind,
|
|
2259
|
+
Rcpp::_["Xc_indptr"] = out_Xc_indptr
|
|
2260
|
+
);
|
|
2261
|
+
else
|
|
2262
|
+
return Rcpp::List::create(
|
|
2263
|
+
Rcpp::_["X_cat"] = csc_to_dense_int(out_Xc,
|
|
2264
|
+
out_Xc_ind,
|
|
2265
|
+
out_Xc_indptr,
|
|
2266
|
+
nrows)
|
|
2267
|
+
);
|
|
2268
|
+
}
|
|
2269
|
+
|
|
2270
|
+
// [[Rcpp::export(rng = false)]]
|
|
2271
|
+
Rcpp::List call_take_cols_by_index_csc
|
|
2272
|
+
(
|
|
2273
|
+
Rcpp::NumericVector Xc_,
|
|
2274
|
+
Rcpp::IntegerVector Xc_ind_,
|
|
2275
|
+
Rcpp::IntegerVector Xc_indptr,
|
|
2276
|
+
Rcpp::IntegerVector cols_take,
|
|
2277
|
+
bool as_dense, size_t nrows
|
|
2278
|
+
)
|
|
2279
|
+
{
|
|
2280
|
+
/* 'cols_take' should be sorted */
|
|
2281
|
+
double *restrict Xc = REAL(Xc_);
|
|
2282
|
+
int *restrict Xc_ind = INTEGER(Xc_ind_);
|
|
2283
|
+
size_t n_take = cols_take.size();
|
|
2284
|
+
Rcpp::IntegerVector out_Xc_indptr(n_take+1);
|
|
2285
|
+
size_t total_size = 0;
|
|
2286
|
+
|
|
2287
|
+
for (size_t col = 0; col < n_take; col++)
|
|
2288
|
+
total_size += Xc_indptr[cols_take[col]+1] - Xc_indptr[cols_take[col]];
|
|
2289
|
+
|
|
2290
|
+
Rcpp::NumericVector out_Xc_(total_size);
|
|
2291
|
+
Rcpp::IntegerVector out_Xc_ind_(total_size);
|
|
2292
|
+
double *restrict out_Xc = REAL(out_Xc_);
|
|
2293
|
+
int *restrict out_Xc_ind = INTEGER(out_Xc_ind_);
|
|
2294
|
+
|
|
2295
|
+
total_size = 0;
|
|
2296
|
+
size_t n_this;
|
|
2297
|
+
out_Xc_indptr[0] = 0;
|
|
2298
|
+
for (size_t col = 0; col < n_take; col++)
|
|
2299
|
+
{
|
|
2300
|
+
n_this = Xc_indptr[cols_take[col]+1] - Xc_indptr[cols_take[col]];
|
|
2301
|
+
if (n_this) {
|
|
2302
|
+
std::copy(Xc + Xc_indptr[cols_take[col]],
|
|
2303
|
+
Xc + Xc_indptr[cols_take[col]] + n_this,
|
|
2304
|
+
out_Xc + total_size);
|
|
2305
|
+
std::copy(Xc_ind + Xc_indptr[cols_take[col]],
|
|
2306
|
+
Xc_ind + Xc_indptr[cols_take[col]] + n_this,
|
|
2307
|
+
out_Xc_ind + total_size);
|
|
2308
|
+
}
|
|
2309
|
+
total_size += n_this;
|
|
2310
|
+
out_Xc_indptr[col+1] = total_size;
|
|
2311
|
+
}
|
|
2312
|
+
|
|
2313
|
+
if (!as_dense)
|
|
2314
|
+
return Rcpp::List::create(
|
|
2315
|
+
Rcpp::_["Xc"] = out_Xc_,
|
|
2316
|
+
Rcpp::_["Xc_ind"] = out_Xc_ind_,
|
|
2317
|
+
Rcpp::_["Xc_indptr"] = out_Xc_indptr
|
|
2318
|
+
);
|
|
2319
|
+
else
|
|
2320
|
+
return Rcpp::List::create(
|
|
2321
|
+
Rcpp::_["X_cat"] = csc_to_dense_int(out_Xc_,
|
|
2322
|
+
out_Xc_ind_,
|
|
2323
|
+
out_Xc_indptr,
|
|
2324
|
+
nrows)
|
|
2325
|
+
);
|
|
2326
|
+
}
|
|
2327
|
+
|
|
2328
|
+
// [[Rcpp::export(rng = false)]]
|
|
2329
|
+
void copy_csc_cols_by_slice
|
|
2330
|
+
(
|
|
2331
|
+
Rcpp::NumericVector out_Xc_,
|
|
2332
|
+
Rcpp::IntegerVector out_Xc_indptr,
|
|
2333
|
+
Rcpp::NumericVector from_Xc_,
|
|
2334
|
+
Rcpp::IntegerVector from_Xc_indptr,
|
|
2335
|
+
size_t n_copy
|
|
2336
|
+
)
|
|
2337
|
+
{
|
|
2338
|
+
size_t total_size = from_Xc_indptr[n_copy+1];
|
|
2339
|
+
std::copy(REAL(from_Xc_), REAL(from_Xc_) + total_size, REAL(out_Xc_));
|
|
2340
|
+
}
|
|
2341
|
+
|
|
2342
|
+
// [[Rcpp::export(rng = false)]]
|
|
2343
|
+
void copy_csc_cols_by_index
|
|
2344
|
+
(
|
|
2345
|
+
Rcpp::NumericVector out_Xc_,
|
|
2346
|
+
Rcpp::IntegerVector out_Xc_indptr,
|
|
2347
|
+
Rcpp::NumericVector from_Xc_,
|
|
2348
|
+
Rcpp::IntegerVector from_Xc_indptr,
|
|
2349
|
+
Rcpp::IntegerVector cols_copy
|
|
2350
|
+
)
|
|
2351
|
+
{
|
|
2352
|
+
size_t n_copy = cols_copy.size();
|
|
2353
|
+
double *restrict out_Xc = REAL(out_Xc_);
|
|
2354
|
+
double *restrict from_Xc = REAL(from_Xc_);
|
|
2355
|
+
|
|
2356
|
+
for (size_t col = 0; col < n_copy; col++)
|
|
2357
|
+
{
|
|
2358
|
+
std::copy(from_Xc + from_Xc_indptr[col],
|
|
2359
|
+
from_Xc + from_Xc_indptr[col+1],
|
|
2360
|
+
out_Xc + out_Xc_indptr[cols_copy[col]]);
|
|
2361
|
+
}
|
|
2362
|
+
}
|
|
2363
|
+
|
|
2364
|
+
|
|
2365
|
+
// [[Rcpp::export(rng = false)]]
|
|
2366
|
+
Rcpp::List assign_csc_cols
|
|
2367
|
+
(
|
|
2368
|
+
Rcpp::NumericVector Xc_,
|
|
2369
|
+
Rcpp::IntegerVector Xc_ind_,
|
|
2370
|
+
Rcpp::IntegerVector Xc_indptr,
|
|
2371
|
+
Rcpp::IntegerVector X_cat_,
|
|
2372
|
+
Rcpp::IntegerVector cols_categ,
|
|
2373
|
+
Rcpp::IntegerVector cols_numeric,
|
|
2374
|
+
size_t nrows
|
|
2375
|
+
)
|
|
2376
|
+
{
|
|
2377
|
+
Rcpp::List out = Rcpp::List::create(
|
|
2378
|
+
Rcpp::_["Xc"] = R_NilValue,
|
|
2379
|
+
Rcpp::_["Xc_ind"] = R_NilValue,
|
|
2380
|
+
Rcpp::_["Xc_indptr"] = R_NilValue
|
|
2381
|
+
);
|
|
2382
|
+
size_t ncols_tot = (size_t)cols_categ.size() + (size_t)cols_numeric.size();
|
|
2383
|
+
std::vector<double> out_Xc;
|
|
2384
|
+
std::vector<int> out_Xc_ind;
|
|
2385
|
+
std::vector<int> out_Xc_indptr(ncols_tot + 1);
|
|
2386
|
+
|
|
2387
|
+
double *restrict Xc = REAL(Xc_);
|
|
2388
|
+
int *restrict Xc_ind = INTEGER(Xc_ind_);
|
|
2389
|
+
int *restrict X_cat = INTEGER(X_cat_);
|
|
2390
|
+
|
|
2391
|
+
hashed_set<int> cols_categ_set(INTEGER(cols_categ), INTEGER(cols_categ) + cols_categ.size());
|
|
2392
|
+
hashed_set<int> cols_numeric_set(INTEGER(cols_numeric), INTEGER(cols_numeric) + cols_numeric.size());
|
|
2393
|
+
|
|
2394
|
+
size_t curr_num = 0;
|
|
2395
|
+
size_t curr_cat = 0;
|
|
2396
|
+
bool has_zeros;
|
|
2397
|
+
size_t curr_size;
|
|
2398
|
+
|
|
2399
|
+
for (size_t col = 0; col < ncols_tot; col++)
|
|
2400
|
+
{
|
|
2401
|
+
if (is_in_set((int)col, cols_numeric_set))
|
|
2402
|
+
{
|
|
2403
|
+
std::copy(Xc + Xc_indptr[curr_num],
|
|
2404
|
+
Xc + Xc_indptr[curr_num+1],
|
|
2405
|
+
std::back_inserter(out_Xc));
|
|
2406
|
+
std::copy(Xc_ind + Xc_indptr[curr_num],
|
|
2407
|
+
Xc_ind + Xc_indptr[curr_num+1],
|
|
2408
|
+
std::back_inserter(out_Xc_ind));
|
|
2409
|
+
curr_num++;
|
|
2410
|
+
}
|
|
2411
|
+
|
|
2412
|
+
else if (is_in_set((int)col, cols_categ_set))
|
|
2413
|
+
{
|
|
2414
|
+
has_zeros = false;
|
|
2415
|
+
for (size_t row = 0; row < nrows; row++)
|
|
2416
|
+
if (X_cat[row + (size_t)curr_cat*nrows] == 0)
|
|
2417
|
+
has_zeros = true;
|
|
2418
|
+
|
|
2419
|
+
if (!has_zeros) {
|
|
2420
|
+
std::copy(X_cat + (size_t)curr_cat*nrows,
|
|
2421
|
+
X_cat + ((size_t)curr_cat+1)*nrows,
|
|
2422
|
+
std::back_inserter(out_Xc));
|
|
2423
|
+
curr_size = out_Xc_ind.size();
|
|
2424
|
+
out_Xc_ind.resize(curr_size + (size_t)nrows);
|
|
2425
|
+
std::iota(out_Xc_ind.begin() + curr_size, out_Xc_ind.end(), (int)0);
|
|
2426
|
+
}
|
|
2427
|
+
|
|
2428
|
+
else {
|
|
2429
|
+
for (size_t row = 0; row < nrows; row++) {
|
|
2430
|
+
if (X_cat[row + (size_t)curr_cat*nrows] > 0) {
|
|
2431
|
+
out_Xc.push_back(X_cat[row + (size_t)curr_cat*nrows]);
|
|
2432
|
+
out_Xc_ind.push_back((int)row);
|
|
2433
|
+
}
|
|
2434
|
+
}
|
|
2435
|
+
}
|
|
2436
|
+
|
|
2437
|
+
curr_cat++;
|
|
2438
|
+
}
|
|
2439
|
+
|
|
2440
|
+
out_Xc_indptr[col+1] = out_Xc.size();
|
|
2441
|
+
}
|
|
2442
|
+
|
|
2443
|
+
|
|
2444
|
+
out["Xc"] = Rcpp::unwindProtect(safe_copy_vec, (void*)&out_Xc);
|
|
2445
|
+
out["Xc_ind"] = Rcpp::unwindProtect(safe_copy_intvec, (void*)&out_Xc_ind);
|
|
2446
|
+
out["Xc_indptr"] = Rcpp::unwindProtect(safe_copy_intvec, (void*)&out_Xc_indptr);
|
|
2447
|
+
return out;
|
|
2448
|
+
}
|
|
2449
|
+
|
|
2450
|
+
/* These are helpers for dealing with large integers and R's copy-on-write semantics */
|
|
2451
|
+
|
|
2452
|
+
// [[Rcpp::export(rng = false)]]
|
|
2453
|
+
Rcpp::NumericVector get_empty_tmat(int nrows_)
|
|
2454
|
+
{
|
|
2455
|
+
size_t nrows = (size_t)nrows_;
|
|
2456
|
+
size_t tmat_size = (nrows * (nrows - (size_t)1)) / (size_t)2;
|
|
2457
|
+
return Rcpp::NumericVector((R_xlen_t)tmat_size);
|
|
2458
|
+
}
|
|
2459
|
+
|
|
2460
|
+
// [[Rcpp::export(rng = false)]]
|
|
2461
|
+
Rcpp::IntegerMatrix get_empty_int_mat(int nrows, int ncols)
|
|
2462
|
+
{
|
|
2463
|
+
return Rcpp::IntegerMatrix(nrows, ncols);
|
|
2464
|
+
}
|
|
2465
|
+
|
|
2466
|
+
// [[Rcpp::export(rng = false)]]
|
|
2467
|
+
Rcpp::IntegerMatrix get_null_int_mat()
|
|
2468
|
+
{
|
|
2469
|
+
return Rcpp::IntegerMatrix(0, 0);
|
|
2470
|
+
}
|
|
2471
|
+
|
|
2472
|
+
// [[Rcpp::export(rng = false)]]
|
|
2473
|
+
int get_ntrees(SEXP model_R_ptr, bool is_extended)
|
|
2474
|
+
{
|
|
2475
|
+
if (is_extended) {
|
|
2476
|
+
ExtIsoForest* ext_model_ptr = static_cast<ExtIsoForest*>(R_ExternalPtrAddr(model_R_ptr));
|
|
2477
|
+
return ext_model_ptr->hplanes.size();
|
|
2478
|
+
}
|
|
2479
|
+
|
|
2480
|
+
else {
|
|
2481
|
+
IsoForest* model_ptr = static_cast<IsoForest*>(R_ExternalPtrAddr(model_R_ptr));
|
|
2482
|
+
return model_ptr->trees.size();
|
|
2483
|
+
}
|
|
2484
|
+
}
|
|
2485
|
+
|
|
2486
|
+
// [[Rcpp::export(rng = false)]]
|
|
2487
|
+
SEXP deepcopy_int(SEXP x)
|
|
2488
|
+
{
|
|
2489
|
+
return Rf_ScalarInteger(Rf_asInteger(x));
|
|
2490
|
+
}
|
|
2491
|
+
|
|
2492
|
+
// [[Rcpp::export(rng = false)]]
|
|
2493
|
+
void modify_R_list_inplace(SEXP lst, int ix, SEXP el)
|
|
2494
|
+
{
|
|
2495
|
+
SET_VECTOR_ELT(lst, ix, el);
|
|
2496
|
+
}
|
|
2497
|
+
|
|
2498
|
+
// [[Rcpp::export(rng = false)]]
|
|
2499
|
+
void addto_R_list_inplace(Rcpp::List &lst, Rcpp::String nm, SEXP el)
|
|
2500
|
+
{
|
|
2501
|
+
lst[nm] = el;
|
|
2502
|
+
}
|
|
2503
|
+
|
|
2504
|
+
|
|
2505
|
+
// [[Rcpp::export(rng = false)]]
|
|
2506
|
+
bool R_has_openmp()
|
|
2507
|
+
{
|
|
2508
|
+
#ifdef _OPENMP
|
|
2509
|
+
return true;
|
|
2510
|
+
#else
|
|
2511
|
+
return false;
|
|
2512
|
+
#endif
|
|
850
2513
|
}
|
|
851
2514
|
|
|
852
2515
|
#endif /* _FOR_R */
|