isotree 0.2.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -1
- data/LICENSE.txt +2 -2
- data/README.md +32 -14
- data/ext/isotree/ext.cpp +144 -31
- data/ext/isotree/extconf.rb +7 -7
- data/lib/isotree/isolation_forest.rb +110 -30
- data/lib/isotree/version.rb +1 -1
- data/vendor/isotree/LICENSE +1 -1
- data/vendor/isotree/README.md +165 -27
- data/vendor/isotree/include/isotree.hpp +2111 -0
- data/vendor/isotree/include/isotree_oop.hpp +394 -0
- data/vendor/isotree/inst/COPYRIGHTS +62 -0
- data/vendor/isotree/src/RcppExports.cpp +525 -52
- data/vendor/isotree/src/Rwrapper.cpp +1931 -268
- data/vendor/isotree/src/c_interface.cpp +953 -0
- data/vendor/isotree/src/crit.hpp +4232 -0
- data/vendor/isotree/src/dist.hpp +1886 -0
- data/vendor/isotree/src/exp_depth_table.hpp +134 -0
- data/vendor/isotree/src/extended.hpp +1444 -0
- data/vendor/isotree/src/external_facing_generic.hpp +399 -0
- data/vendor/isotree/src/fit_model.hpp +2401 -0
- data/vendor/isotree/src/{dealloc.cpp → headers_joined.hpp} +38 -22
- data/vendor/isotree/src/helpers_iforest.hpp +813 -0
- data/vendor/isotree/src/{impute.cpp → impute.hpp} +353 -122
- data/vendor/isotree/src/indexer.cpp +515 -0
- data/vendor/isotree/src/instantiate_template_headers.cpp +118 -0
- data/vendor/isotree/src/instantiate_template_headers.hpp +240 -0
- data/vendor/isotree/src/isoforest.hpp +1659 -0
- data/vendor/isotree/src/isotree.hpp +1804 -392
- data/vendor/isotree/src/isotree_exportable.hpp +99 -0
- data/vendor/isotree/src/merge_models.cpp +159 -16
- data/vendor/isotree/src/mult.hpp +1321 -0
- data/vendor/isotree/src/oop_interface.cpp +842 -0
- data/vendor/isotree/src/oop_interface.hpp +278 -0
- data/vendor/isotree/src/other_helpers.hpp +219 -0
- data/vendor/isotree/src/predict.hpp +1932 -0
- data/vendor/isotree/src/python_helpers.hpp +134 -0
- data/vendor/isotree/src/ref_indexer.hpp +154 -0
- data/vendor/isotree/src/robinmap/LICENSE +21 -0
- data/vendor/isotree/src/robinmap/README.md +483 -0
- data/vendor/isotree/src/robinmap/include/tsl/robin_growth_policy.h +406 -0
- data/vendor/isotree/src/robinmap/include/tsl/robin_hash.h +1620 -0
- data/vendor/isotree/src/robinmap/include/tsl/robin_map.h +807 -0
- data/vendor/isotree/src/robinmap/include/tsl/robin_set.h +660 -0
- data/vendor/isotree/src/serialize.cpp +4300 -139
- data/vendor/isotree/src/sql.cpp +141 -59
- data/vendor/isotree/src/subset_models.cpp +174 -0
- data/vendor/isotree/src/utils.hpp +3808 -0
- data/vendor/isotree/src/xoshiro.hpp +467 -0
- data/vendor/isotree/src/ziggurat.hpp +405 -0
- metadata +38 -104
- data/vendor/cereal/LICENSE +0 -24
- data/vendor/cereal/README.md +0 -85
- data/vendor/cereal/include/cereal/access.hpp +0 -351
- data/vendor/cereal/include/cereal/archives/adapters.hpp +0 -163
- data/vendor/cereal/include/cereal/archives/binary.hpp +0 -169
- data/vendor/cereal/include/cereal/archives/json.hpp +0 -1019
- data/vendor/cereal/include/cereal/archives/portable_binary.hpp +0 -334
- data/vendor/cereal/include/cereal/archives/xml.hpp +0 -956
- data/vendor/cereal/include/cereal/cereal.hpp +0 -1089
- data/vendor/cereal/include/cereal/details/helpers.hpp +0 -422
- data/vendor/cereal/include/cereal/details/polymorphic_impl.hpp +0 -796
- data/vendor/cereal/include/cereal/details/polymorphic_impl_fwd.hpp +0 -65
- data/vendor/cereal/include/cereal/details/static_object.hpp +0 -127
- data/vendor/cereal/include/cereal/details/traits.hpp +0 -1411
- data/vendor/cereal/include/cereal/details/util.hpp +0 -84
- data/vendor/cereal/include/cereal/external/base64.hpp +0 -134
- data/vendor/cereal/include/cereal/external/rapidjson/allocators.h +0 -284
- data/vendor/cereal/include/cereal/external/rapidjson/cursorstreamwrapper.h +0 -78
- data/vendor/cereal/include/cereal/external/rapidjson/document.h +0 -2652
- data/vendor/cereal/include/cereal/external/rapidjson/encodedstream.h +0 -299
- data/vendor/cereal/include/cereal/external/rapidjson/encodings.h +0 -716
- data/vendor/cereal/include/cereal/external/rapidjson/error/en.h +0 -74
- data/vendor/cereal/include/cereal/external/rapidjson/error/error.h +0 -161
- data/vendor/cereal/include/cereal/external/rapidjson/filereadstream.h +0 -99
- data/vendor/cereal/include/cereal/external/rapidjson/filewritestream.h +0 -104
- data/vendor/cereal/include/cereal/external/rapidjson/fwd.h +0 -151
- data/vendor/cereal/include/cereal/external/rapidjson/internal/biginteger.h +0 -290
- data/vendor/cereal/include/cereal/external/rapidjson/internal/diyfp.h +0 -271
- data/vendor/cereal/include/cereal/external/rapidjson/internal/dtoa.h +0 -245
- data/vendor/cereal/include/cereal/external/rapidjson/internal/ieee754.h +0 -78
- data/vendor/cereal/include/cereal/external/rapidjson/internal/itoa.h +0 -308
- data/vendor/cereal/include/cereal/external/rapidjson/internal/meta.h +0 -186
- data/vendor/cereal/include/cereal/external/rapidjson/internal/pow10.h +0 -55
- data/vendor/cereal/include/cereal/external/rapidjson/internal/regex.h +0 -740
- data/vendor/cereal/include/cereal/external/rapidjson/internal/stack.h +0 -232
- data/vendor/cereal/include/cereal/external/rapidjson/internal/strfunc.h +0 -69
- data/vendor/cereal/include/cereal/external/rapidjson/internal/strtod.h +0 -290
- data/vendor/cereal/include/cereal/external/rapidjson/internal/swap.h +0 -46
- data/vendor/cereal/include/cereal/external/rapidjson/istreamwrapper.h +0 -128
- data/vendor/cereal/include/cereal/external/rapidjson/memorybuffer.h +0 -70
- data/vendor/cereal/include/cereal/external/rapidjson/memorystream.h +0 -71
- data/vendor/cereal/include/cereal/external/rapidjson/msinttypes/inttypes.h +0 -316
- data/vendor/cereal/include/cereal/external/rapidjson/msinttypes/stdint.h +0 -300
- data/vendor/cereal/include/cereal/external/rapidjson/ostreamwrapper.h +0 -81
- data/vendor/cereal/include/cereal/external/rapidjson/pointer.h +0 -1414
- data/vendor/cereal/include/cereal/external/rapidjson/prettywriter.h +0 -277
- data/vendor/cereal/include/cereal/external/rapidjson/rapidjson.h +0 -656
- data/vendor/cereal/include/cereal/external/rapidjson/reader.h +0 -2230
- data/vendor/cereal/include/cereal/external/rapidjson/schema.h +0 -2497
- data/vendor/cereal/include/cereal/external/rapidjson/stream.h +0 -223
- data/vendor/cereal/include/cereal/external/rapidjson/stringbuffer.h +0 -121
- data/vendor/cereal/include/cereal/external/rapidjson/writer.h +0 -709
- data/vendor/cereal/include/cereal/external/rapidxml/license.txt +0 -52
- data/vendor/cereal/include/cereal/external/rapidxml/manual.html +0 -406
- data/vendor/cereal/include/cereal/external/rapidxml/rapidxml.hpp +0 -2624
- data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_iterators.hpp +0 -175
- data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_print.hpp +0 -428
- data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_utils.hpp +0 -123
- data/vendor/cereal/include/cereal/macros.hpp +0 -154
- data/vendor/cereal/include/cereal/specialize.hpp +0 -139
- data/vendor/cereal/include/cereal/types/array.hpp +0 -79
- data/vendor/cereal/include/cereal/types/atomic.hpp +0 -55
- data/vendor/cereal/include/cereal/types/base_class.hpp +0 -203
- data/vendor/cereal/include/cereal/types/bitset.hpp +0 -176
- data/vendor/cereal/include/cereal/types/boost_variant.hpp +0 -164
- data/vendor/cereal/include/cereal/types/chrono.hpp +0 -72
- data/vendor/cereal/include/cereal/types/common.hpp +0 -129
- data/vendor/cereal/include/cereal/types/complex.hpp +0 -56
- data/vendor/cereal/include/cereal/types/concepts/pair_associative_container.hpp +0 -73
- data/vendor/cereal/include/cereal/types/deque.hpp +0 -62
- data/vendor/cereal/include/cereal/types/forward_list.hpp +0 -68
- data/vendor/cereal/include/cereal/types/functional.hpp +0 -43
- data/vendor/cereal/include/cereal/types/list.hpp +0 -62
- data/vendor/cereal/include/cereal/types/map.hpp +0 -36
- data/vendor/cereal/include/cereal/types/memory.hpp +0 -425
- data/vendor/cereal/include/cereal/types/optional.hpp +0 -66
- data/vendor/cereal/include/cereal/types/polymorphic.hpp +0 -483
- data/vendor/cereal/include/cereal/types/queue.hpp +0 -132
- data/vendor/cereal/include/cereal/types/set.hpp +0 -103
- data/vendor/cereal/include/cereal/types/stack.hpp +0 -76
- data/vendor/cereal/include/cereal/types/string.hpp +0 -61
- data/vendor/cereal/include/cereal/types/tuple.hpp +0 -123
- data/vendor/cereal/include/cereal/types/unordered_map.hpp +0 -36
- data/vendor/cereal/include/cereal/types/unordered_set.hpp +0 -99
- data/vendor/cereal/include/cereal/types/utility.hpp +0 -47
- data/vendor/cereal/include/cereal/types/valarray.hpp +0 -89
- data/vendor/cereal/include/cereal/types/variant.hpp +0 -109
- data/vendor/cereal/include/cereal/types/vector.hpp +0 -112
- data/vendor/cereal/include/cereal/version.hpp +0 -52
- data/vendor/isotree/src/Makevars +0 -4
- data/vendor/isotree/src/crit.cpp +0 -912
- data/vendor/isotree/src/dist.cpp +0 -749
- data/vendor/isotree/src/extended.cpp +0 -790
- data/vendor/isotree/src/fit_model.cpp +0 -1090
- data/vendor/isotree/src/helpers_iforest.cpp +0 -324
- data/vendor/isotree/src/isoforest.cpp +0 -771
- data/vendor/isotree/src/mult.cpp +0 -607
- data/vendor/isotree/src/predict.cpp +0 -853
- data/vendor/isotree/src/utils.cpp +0 -1566
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
/* Isolation forests and variations thereof, with adjustments for incorporation
|
|
2
|
+
* of categorical variables and missing values.
|
|
3
|
+
* Writen for C++11 standard and aimed at being used in R and Python.
|
|
4
|
+
*
|
|
5
|
+
* This library is based on the following works:
|
|
6
|
+
* [1] Liu, Fei Tony, Kai Ming Ting, and Zhi-Hua Zhou.
|
|
7
|
+
* "Isolation forest."
|
|
8
|
+
* 2008 Eighth IEEE International Conference on Data Mining. IEEE, 2008.
|
|
9
|
+
* [2] Liu, Fei Tony, Kai Ming Ting, and Zhi-Hua Zhou.
|
|
10
|
+
* "Isolation-based anomaly detection."
|
|
11
|
+
* ACM Transactions on Knowledge Discovery from Data (TKDD) 6.1 (2012): 3.
|
|
12
|
+
* [3] Hariri, Sahand, Matias Carrasco Kind, and Robert J. Brunner.
|
|
13
|
+
* "Extended Isolation Forest."
|
|
14
|
+
* arXiv preprint arXiv:1811.02141 (2018).
|
|
15
|
+
* [4] Liu, Fei Tony, Kai Ming Ting, and Zhi-Hua Zhou.
|
|
16
|
+
* "On detecting clustered anomalies using SCiForest."
|
|
17
|
+
* Joint European Conference on Machine Learning and Knowledge Discovery in Databases. Springer, Berlin, Heidelberg, 2010.
|
|
18
|
+
* [5] https://sourceforge.net/projects/iforest/
|
|
19
|
+
* [6] https://math.stackexchange.com/questions/3388518/expected-number-of-paths-required-to-separate-elements-in-a-binary-tree
|
|
20
|
+
* [7] Quinlan, J. Ross. C4. 5: programs for machine learning. Elsevier, 2014.
|
|
21
|
+
* [8] Cortes, David.
|
|
22
|
+
* "Distance approximation using Isolation Forests."
|
|
23
|
+
* arXiv preprint arXiv:1910.12362 (2019).
|
|
24
|
+
* [9] Cortes, David.
|
|
25
|
+
* "Imputing missing values with unsupervised random trees."
|
|
26
|
+
* arXiv preprint arXiv:1911.06646 (2019).
|
|
27
|
+
* [10] https://math.stackexchange.com/questions/3333220/expected-average-depth-in-random-binary-tree-constructed-top-to-bottom
|
|
28
|
+
* [11] Cortes, David.
|
|
29
|
+
* "Revisiting randomized choices in isolation forests."
|
|
30
|
+
* arXiv preprint arXiv:2110.13402 (2021).
|
|
31
|
+
* [12] Guha, Sudipto, et al.
|
|
32
|
+
* "Robust random cut forest based anomaly detection on streams."
|
|
33
|
+
* International conference on machine learning. PMLR, 2016.
|
|
34
|
+
* [13] Cortes, David.
|
|
35
|
+
* "Isolation forests: looking beyond tree depth."
|
|
36
|
+
* arXiv preprint arXiv:2111.11639 (2021).
|
|
37
|
+
* [14] Ting, Kai Ming, Yue Zhu, and Zhi-Hua Zhou.
|
|
38
|
+
* "Isolation kernel and its effect on SVM"
|
|
39
|
+
* Proceedings of the 24th ACM SIGKDD
|
|
40
|
+
* International Conference on Knowledge Discovery & Data Mining. 2018.
|
|
41
|
+
*
|
|
42
|
+
* BSD 2-Clause License
|
|
43
|
+
* Copyright (c) 2019-2022, David Cortes
|
|
44
|
+
* All rights reserved.
|
|
45
|
+
* Redistribution and use in source and binary forms, with or without
|
|
46
|
+
* modification, are permitted provided that the following conditions are met:
|
|
47
|
+
* * Redistributions of source code must retain the above copyright notice, this
|
|
48
|
+
* list of conditions and the following disclaimer.
|
|
49
|
+
* * Redistributions in binary form must reproduce the above copyright notice,
|
|
50
|
+
* this list of conditions and the following disclaimer in the documentation
|
|
51
|
+
* and/or other materials provided with the distribution.
|
|
52
|
+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
53
|
+
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
54
|
+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
55
|
+
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
56
|
+
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
57
|
+
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
58
|
+
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
59
|
+
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
60
|
+
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
61
|
+
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
62
|
+
*/
|
|
63
|
+
|
|
64
|
+
ISOTREE_EXPORTED int fit_iforest(IsoForest *model_outputs, ExtIsoForest *model_outputs_ext,
|
|
65
|
+
real_t numeric_data[], size_t ncols_numeric,
|
|
66
|
+
int categ_data[], size_t ncols_categ, int ncat[],
|
|
67
|
+
real_t Xc[], sparse_ix Xc_ind[], sparse_ix Xc_indptr[],
|
|
68
|
+
size_t ndim, size_t ntry, CoefType coef_type, bool coef_by_prop,
|
|
69
|
+
real_t sample_weights[], bool with_replacement, bool weight_as_sample,
|
|
70
|
+
size_t nrows, size_t sample_size, size_t ntrees,
|
|
71
|
+
size_t max_depth, size_t ncols_per_tree,
|
|
72
|
+
bool limit_depth, bool penalize_range, bool standardize_data,
|
|
73
|
+
ScoringMetric scoring_metric, bool fast_bratio,
|
|
74
|
+
bool standardize_dist, double tmat[],
|
|
75
|
+
double output_depths[], bool standardize_depth,
|
|
76
|
+
real_t col_weights[], bool weigh_by_kurt,
|
|
77
|
+
double prob_pick_by_gain_pl, double prob_pick_by_gain_avg,
|
|
78
|
+
double prob_pick_by_full_gain, double prob_pick_by_dens,
|
|
79
|
+
double prob_pick_col_by_range, double prob_pick_col_by_var,
|
|
80
|
+
double prob_pick_col_by_kurt,
|
|
81
|
+
double min_gain, MissingAction missing_action,
|
|
82
|
+
CategSplit cat_split_type, NewCategAction new_cat_action,
|
|
83
|
+
bool all_perm, Imputer *imputer, size_t min_imp_obs,
|
|
84
|
+
UseDepthImp depth_imp, WeighImpRows weigh_imp_rows, bool impute_at_fit,
|
|
85
|
+
uint64_t random_seed, bool use_long_double, int nthreads)
|
|
86
|
+
{
|
|
87
|
+
return fit_iforest<real_t, sparse_ix>
|
|
88
|
+
(model_outputs, model_outputs_ext,
|
|
89
|
+
numeric_data, ncols_numeric,
|
|
90
|
+
categ_data, ncols_categ, ncat,
|
|
91
|
+
Xc, Xc_ind, Xc_indptr,
|
|
92
|
+
ndim, ntry, coef_type, coef_by_prop,
|
|
93
|
+
sample_weights, with_replacement, weight_as_sample,
|
|
94
|
+
nrows, sample_size, ntrees,
|
|
95
|
+
max_depth, ncols_per_tree,
|
|
96
|
+
limit_depth, penalize_range, standardize_data,
|
|
97
|
+
scoring_metric, fast_bratio,
|
|
98
|
+
standardize_dist, tmat,
|
|
99
|
+
output_depths, standardize_depth,
|
|
100
|
+
col_weights, weigh_by_kurt,
|
|
101
|
+
prob_pick_by_gain_pl, prob_pick_by_gain_avg,
|
|
102
|
+
prob_pick_by_full_gain, prob_pick_by_dens,
|
|
103
|
+
prob_pick_col_by_range, prob_pick_col_by_var,
|
|
104
|
+
prob_pick_col_by_kurt,
|
|
105
|
+
min_gain, missing_action,
|
|
106
|
+
cat_split_type, new_cat_action,
|
|
107
|
+
all_perm, imputer, min_imp_obs,
|
|
108
|
+
depth_imp, weigh_imp_rows, impute_at_fit,
|
|
109
|
+
random_seed, use_long_double, nthreads);
|
|
110
|
+
}
|
|
111
|
+
ISOTREE_EXPORTED int add_tree(IsoForest *model_outputs, ExtIsoForest *model_outputs_ext,
|
|
112
|
+
real_t numeric_data[], size_t ncols_numeric,
|
|
113
|
+
int categ_data[], size_t ncols_categ, int ncat[],
|
|
114
|
+
real_t Xc[], sparse_ix Xc_ind[], sparse_ix Xc_indptr[],
|
|
115
|
+
size_t ndim, size_t ntry, CoefType coef_type, bool coef_by_prop,
|
|
116
|
+
real_t sample_weights[], size_t nrows,
|
|
117
|
+
size_t max_depth, size_t ncols_per_tree,
|
|
118
|
+
bool limit_depth, bool penalize_range, bool standardize_data,
|
|
119
|
+
bool fast_bratio,
|
|
120
|
+
real_t col_weights[], bool weigh_by_kurt,
|
|
121
|
+
double prob_pick_by_gain_pl, double prob_pick_by_gain_avg,
|
|
122
|
+
double prob_pick_by_full_gain, double prob_pick_by_dens,
|
|
123
|
+
double prob_pick_col_by_range, double prob_pick_col_by_var,
|
|
124
|
+
double prob_pick_col_by_kurt,
|
|
125
|
+
double min_gain, MissingAction missing_action,
|
|
126
|
+
CategSplit cat_split_type, NewCategAction new_cat_action,
|
|
127
|
+
UseDepthImp depth_imp, WeighImpRows weigh_imp_rows,
|
|
128
|
+
bool all_perm, Imputer *imputer, size_t min_imp_obs,
|
|
129
|
+
TreesIndexer *indexer,
|
|
130
|
+
real_t ref_numeric_data[], int ref_categ_data[],
|
|
131
|
+
bool ref_is_col_major, size_t ref_ld_numeric, size_t ref_ld_categ,
|
|
132
|
+
real_t ref_Xc[], sparse_ix ref_Xc_ind[], sparse_ix ref_Xc_indptr[],
|
|
133
|
+
uint64_t random_seed, bool use_long_double)
|
|
134
|
+
{
|
|
135
|
+
return add_tree<real_t, sparse_ix>
|
|
136
|
+
(model_outputs, model_outputs_ext,
|
|
137
|
+
numeric_data, ncols_numeric,
|
|
138
|
+
categ_data, ncols_categ, ncat,
|
|
139
|
+
Xc, Xc_ind, Xc_indptr,
|
|
140
|
+
ndim, ntry, coef_type, coef_by_prop,
|
|
141
|
+
sample_weights, nrows,
|
|
142
|
+
max_depth, ncols_per_tree,
|
|
143
|
+
limit_depth, penalize_range, standardize_data,
|
|
144
|
+
fast_bratio,
|
|
145
|
+
col_weights, weigh_by_kurt,
|
|
146
|
+
prob_pick_by_gain_pl, prob_pick_by_gain_avg,
|
|
147
|
+
prob_pick_by_full_gain, prob_pick_by_dens,
|
|
148
|
+
prob_pick_col_by_range, prob_pick_col_by_var,
|
|
149
|
+
prob_pick_col_by_kurt,
|
|
150
|
+
min_gain, missing_action,
|
|
151
|
+
cat_split_type, new_cat_action,
|
|
152
|
+
depth_imp, weigh_imp_rows,
|
|
153
|
+
all_perm, imputer, min_imp_obs,
|
|
154
|
+
indexer,
|
|
155
|
+
ref_numeric_data, ref_categ_data,
|
|
156
|
+
ref_is_col_major, ref_ld_numeric, ref_ld_categ,
|
|
157
|
+
ref_Xc, ref_Xc_ind, ref_Xc_indptr,
|
|
158
|
+
random_seed, use_long_double);
|
|
159
|
+
}
|
|
160
|
+
ISOTREE_EXPORTED void predict_iforest(real_t numeric_data[], int categ_data[],
|
|
161
|
+
bool is_col_major, size_t ncols_numeric, size_t ncols_categ,
|
|
162
|
+
real_t Xc[], sparse_ix Xc_ind[], sparse_ix Xc_indptr[],
|
|
163
|
+
real_t Xr[], sparse_ix Xr_ind[], sparse_ix Xr_indptr[],
|
|
164
|
+
size_t nrows, int nthreads, bool standardize,
|
|
165
|
+
IsoForest *model_outputs, ExtIsoForest *model_outputs_ext,
|
|
166
|
+
double output_depths[], sparse_ix tree_num[],
|
|
167
|
+
double per_tree_depths[],
|
|
168
|
+
TreesIndexer *indexer)
|
|
169
|
+
{
|
|
170
|
+
predict_iforest<real_t, sparse_ix>
|
|
171
|
+
(numeric_data, categ_data,
|
|
172
|
+
is_col_major, ncols_numeric, ncols_categ,
|
|
173
|
+
Xc, Xc_ind, Xc_indptr,
|
|
174
|
+
Xr, Xr_ind, Xr_indptr,
|
|
175
|
+
nrows, nthreads, standardize,
|
|
176
|
+
model_outputs, model_outputs_ext,
|
|
177
|
+
output_depths, tree_num,
|
|
178
|
+
per_tree_depths,
|
|
179
|
+
indexer);
|
|
180
|
+
}
|
|
181
|
+
ISOTREE_EXPORTED void calc_similarity(real_t numeric_data[], int categ_data[],
|
|
182
|
+
real_t Xc[], sparse_ix Xc_ind[], sparse_ix Xc_indptr[],
|
|
183
|
+
size_t nrows, bool use_long_double, int nthreads,
|
|
184
|
+
bool assume_full_distr, bool standardize_dist, bool as_kernel,
|
|
185
|
+
IsoForest *model_outputs, ExtIsoForest *model_outputs_ext,
|
|
186
|
+
double tmat[], double rmat[], size_t n_from, bool use_indexed_references,
|
|
187
|
+
TreesIndexer *indexer, bool is_col_major, size_t ld_numeric, size_t ld_categ)
|
|
188
|
+
{
|
|
189
|
+
calc_similarity<real_t, sparse_ix>
|
|
190
|
+
(numeric_data, categ_data,
|
|
191
|
+
Xc, Xc_ind, Xc_indptr,
|
|
192
|
+
nrows, use_long_double, nthreads,
|
|
193
|
+
assume_full_distr, standardize_dist, as_kernel,
|
|
194
|
+
model_outputs, model_outputs_ext,
|
|
195
|
+
tmat, rmat, n_from, use_indexed_references,
|
|
196
|
+
indexer, is_col_major, ld_numeric, ld_categ);
|
|
197
|
+
}
|
|
198
|
+
ISOTREE_EXPORTED void impute_missing_values(real_t numeric_data[], int categ_data[], bool is_col_major,
|
|
199
|
+
real_t Xr[], sparse_ix Xr_ind[], sparse_ix Xr_indptr[],
|
|
200
|
+
size_t nrows, bool use_long_double, int nthreads,
|
|
201
|
+
IsoForest *model_outputs, ExtIsoForest *model_outputs_ext,
|
|
202
|
+
Imputer &imputer)
|
|
203
|
+
{
|
|
204
|
+
impute_missing_values<real_t, sparse_ix>
|
|
205
|
+
(numeric_data, categ_data, is_col_major,
|
|
206
|
+
Xr, Xr_ind, Xr_indptr,
|
|
207
|
+
nrows, use_long_double, nthreads,
|
|
208
|
+
model_outputs, model_outputs_ext,
|
|
209
|
+
imputer);
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
ISOTREE_EXPORTED void set_reference_points(IsoForest *model_outputs, ExtIsoForest *model_outputs_ext, TreesIndexer *indexer,
|
|
213
|
+
const bool with_distances,
|
|
214
|
+
real_t *numeric_data, int *categ_data,
|
|
215
|
+
bool is_col_major, size_t ld_numeric, size_t ld_categ,
|
|
216
|
+
real_t *Xc, sparse_ix *Xc_ind, sparse_ix *Xc_indptr,
|
|
217
|
+
real_t *Xr, sparse_ix *Xr_ind, sparse_ix *Xr_indptr,
|
|
218
|
+
size_t nrows, int nthreads)
|
|
219
|
+
{
|
|
220
|
+
set_reference_points<real_t, sparse_ix>
|
|
221
|
+
(model_outputs, model_outputs_ext, indexer,
|
|
222
|
+
with_distances,
|
|
223
|
+
numeric_data, categ_data,
|
|
224
|
+
is_col_major, ld_numeric, ld_categ,
|
|
225
|
+
Xc, Xc_ind, Xc_indptr,
|
|
226
|
+
Xr, Xr_ind, Xr_indptr,
|
|
227
|
+
nrows, nthreads);
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
#ifndef _NO_REAL_T
|
|
231
|
+
ISOTREE_EXPORTED void get_num_nodes(IsoForest &model_outputs, sparse_ix *n_nodes, sparse_ix *n_terminal, int nthreads) noexcept
|
|
232
|
+
{
|
|
233
|
+
get_num_nodes<sparse_ix>(model_outputs, n_nodes, n_terminal, nthreads);
|
|
234
|
+
}
|
|
235
|
+
ISOTREE_EXPORTED void get_num_nodes(ExtIsoForest &model_outputs, sparse_ix *n_nodes, sparse_ix *n_terminal, int nthreads) noexcept
|
|
236
|
+
{
|
|
237
|
+
get_num_nodes<sparse_ix>(model_outputs, n_nodes, n_terminal, nthreads);
|
|
238
|
+
}
|
|
239
|
+
#endif
|
|
240
|
+
|