isotree 0.2.2 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (151) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +8 -1
  3. data/LICENSE.txt +2 -2
  4. data/README.md +32 -14
  5. data/ext/isotree/ext.cpp +144 -31
  6. data/ext/isotree/extconf.rb +7 -7
  7. data/lib/isotree/isolation_forest.rb +110 -30
  8. data/lib/isotree/version.rb +1 -1
  9. data/vendor/isotree/LICENSE +1 -1
  10. data/vendor/isotree/README.md +165 -27
  11. data/vendor/isotree/include/isotree.hpp +2111 -0
  12. data/vendor/isotree/include/isotree_oop.hpp +394 -0
  13. data/vendor/isotree/inst/COPYRIGHTS +62 -0
  14. data/vendor/isotree/src/RcppExports.cpp +525 -52
  15. data/vendor/isotree/src/Rwrapper.cpp +1931 -268
  16. data/vendor/isotree/src/c_interface.cpp +953 -0
  17. data/vendor/isotree/src/crit.hpp +4232 -0
  18. data/vendor/isotree/src/dist.hpp +1886 -0
  19. data/vendor/isotree/src/exp_depth_table.hpp +134 -0
  20. data/vendor/isotree/src/extended.hpp +1444 -0
  21. data/vendor/isotree/src/external_facing_generic.hpp +399 -0
  22. data/vendor/isotree/src/fit_model.hpp +2401 -0
  23. data/vendor/isotree/src/{dealloc.cpp → headers_joined.hpp} +38 -22
  24. data/vendor/isotree/src/helpers_iforest.hpp +813 -0
  25. data/vendor/isotree/src/{impute.cpp → impute.hpp} +353 -122
  26. data/vendor/isotree/src/indexer.cpp +515 -0
  27. data/vendor/isotree/src/instantiate_template_headers.cpp +118 -0
  28. data/vendor/isotree/src/instantiate_template_headers.hpp +240 -0
  29. data/vendor/isotree/src/isoforest.hpp +1659 -0
  30. data/vendor/isotree/src/isotree.hpp +1804 -392
  31. data/vendor/isotree/src/isotree_exportable.hpp +99 -0
  32. data/vendor/isotree/src/merge_models.cpp +159 -16
  33. data/vendor/isotree/src/mult.hpp +1321 -0
  34. data/vendor/isotree/src/oop_interface.cpp +842 -0
  35. data/vendor/isotree/src/oop_interface.hpp +278 -0
  36. data/vendor/isotree/src/other_helpers.hpp +219 -0
  37. data/vendor/isotree/src/predict.hpp +1932 -0
  38. data/vendor/isotree/src/python_helpers.hpp +134 -0
  39. data/vendor/isotree/src/ref_indexer.hpp +154 -0
  40. data/vendor/isotree/src/robinmap/LICENSE +21 -0
  41. data/vendor/isotree/src/robinmap/README.md +483 -0
  42. data/vendor/isotree/src/robinmap/include/tsl/robin_growth_policy.h +406 -0
  43. data/vendor/isotree/src/robinmap/include/tsl/robin_hash.h +1620 -0
  44. data/vendor/isotree/src/robinmap/include/tsl/robin_map.h +807 -0
  45. data/vendor/isotree/src/robinmap/include/tsl/robin_set.h +660 -0
  46. data/vendor/isotree/src/serialize.cpp +4300 -139
  47. data/vendor/isotree/src/sql.cpp +141 -59
  48. data/vendor/isotree/src/subset_models.cpp +174 -0
  49. data/vendor/isotree/src/utils.hpp +3808 -0
  50. data/vendor/isotree/src/xoshiro.hpp +467 -0
  51. data/vendor/isotree/src/ziggurat.hpp +405 -0
  52. metadata +38 -104
  53. data/vendor/cereal/LICENSE +0 -24
  54. data/vendor/cereal/README.md +0 -85
  55. data/vendor/cereal/include/cereal/access.hpp +0 -351
  56. data/vendor/cereal/include/cereal/archives/adapters.hpp +0 -163
  57. data/vendor/cereal/include/cereal/archives/binary.hpp +0 -169
  58. data/vendor/cereal/include/cereal/archives/json.hpp +0 -1019
  59. data/vendor/cereal/include/cereal/archives/portable_binary.hpp +0 -334
  60. data/vendor/cereal/include/cereal/archives/xml.hpp +0 -956
  61. data/vendor/cereal/include/cereal/cereal.hpp +0 -1089
  62. data/vendor/cereal/include/cereal/details/helpers.hpp +0 -422
  63. data/vendor/cereal/include/cereal/details/polymorphic_impl.hpp +0 -796
  64. data/vendor/cereal/include/cereal/details/polymorphic_impl_fwd.hpp +0 -65
  65. data/vendor/cereal/include/cereal/details/static_object.hpp +0 -127
  66. data/vendor/cereal/include/cereal/details/traits.hpp +0 -1411
  67. data/vendor/cereal/include/cereal/details/util.hpp +0 -84
  68. data/vendor/cereal/include/cereal/external/base64.hpp +0 -134
  69. data/vendor/cereal/include/cereal/external/rapidjson/allocators.h +0 -284
  70. data/vendor/cereal/include/cereal/external/rapidjson/cursorstreamwrapper.h +0 -78
  71. data/vendor/cereal/include/cereal/external/rapidjson/document.h +0 -2652
  72. data/vendor/cereal/include/cereal/external/rapidjson/encodedstream.h +0 -299
  73. data/vendor/cereal/include/cereal/external/rapidjson/encodings.h +0 -716
  74. data/vendor/cereal/include/cereal/external/rapidjson/error/en.h +0 -74
  75. data/vendor/cereal/include/cereal/external/rapidjson/error/error.h +0 -161
  76. data/vendor/cereal/include/cereal/external/rapidjson/filereadstream.h +0 -99
  77. data/vendor/cereal/include/cereal/external/rapidjson/filewritestream.h +0 -104
  78. data/vendor/cereal/include/cereal/external/rapidjson/fwd.h +0 -151
  79. data/vendor/cereal/include/cereal/external/rapidjson/internal/biginteger.h +0 -290
  80. data/vendor/cereal/include/cereal/external/rapidjson/internal/diyfp.h +0 -271
  81. data/vendor/cereal/include/cereal/external/rapidjson/internal/dtoa.h +0 -245
  82. data/vendor/cereal/include/cereal/external/rapidjson/internal/ieee754.h +0 -78
  83. data/vendor/cereal/include/cereal/external/rapidjson/internal/itoa.h +0 -308
  84. data/vendor/cereal/include/cereal/external/rapidjson/internal/meta.h +0 -186
  85. data/vendor/cereal/include/cereal/external/rapidjson/internal/pow10.h +0 -55
  86. data/vendor/cereal/include/cereal/external/rapidjson/internal/regex.h +0 -740
  87. data/vendor/cereal/include/cereal/external/rapidjson/internal/stack.h +0 -232
  88. data/vendor/cereal/include/cereal/external/rapidjson/internal/strfunc.h +0 -69
  89. data/vendor/cereal/include/cereal/external/rapidjson/internal/strtod.h +0 -290
  90. data/vendor/cereal/include/cereal/external/rapidjson/internal/swap.h +0 -46
  91. data/vendor/cereal/include/cereal/external/rapidjson/istreamwrapper.h +0 -128
  92. data/vendor/cereal/include/cereal/external/rapidjson/memorybuffer.h +0 -70
  93. data/vendor/cereal/include/cereal/external/rapidjson/memorystream.h +0 -71
  94. data/vendor/cereal/include/cereal/external/rapidjson/msinttypes/inttypes.h +0 -316
  95. data/vendor/cereal/include/cereal/external/rapidjson/msinttypes/stdint.h +0 -300
  96. data/vendor/cereal/include/cereal/external/rapidjson/ostreamwrapper.h +0 -81
  97. data/vendor/cereal/include/cereal/external/rapidjson/pointer.h +0 -1414
  98. data/vendor/cereal/include/cereal/external/rapidjson/prettywriter.h +0 -277
  99. data/vendor/cereal/include/cereal/external/rapidjson/rapidjson.h +0 -656
  100. data/vendor/cereal/include/cereal/external/rapidjson/reader.h +0 -2230
  101. data/vendor/cereal/include/cereal/external/rapidjson/schema.h +0 -2497
  102. data/vendor/cereal/include/cereal/external/rapidjson/stream.h +0 -223
  103. data/vendor/cereal/include/cereal/external/rapidjson/stringbuffer.h +0 -121
  104. data/vendor/cereal/include/cereal/external/rapidjson/writer.h +0 -709
  105. data/vendor/cereal/include/cereal/external/rapidxml/license.txt +0 -52
  106. data/vendor/cereal/include/cereal/external/rapidxml/manual.html +0 -406
  107. data/vendor/cereal/include/cereal/external/rapidxml/rapidxml.hpp +0 -2624
  108. data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_iterators.hpp +0 -175
  109. data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_print.hpp +0 -428
  110. data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_utils.hpp +0 -123
  111. data/vendor/cereal/include/cereal/macros.hpp +0 -154
  112. data/vendor/cereal/include/cereal/specialize.hpp +0 -139
  113. data/vendor/cereal/include/cereal/types/array.hpp +0 -79
  114. data/vendor/cereal/include/cereal/types/atomic.hpp +0 -55
  115. data/vendor/cereal/include/cereal/types/base_class.hpp +0 -203
  116. data/vendor/cereal/include/cereal/types/bitset.hpp +0 -176
  117. data/vendor/cereal/include/cereal/types/boost_variant.hpp +0 -164
  118. data/vendor/cereal/include/cereal/types/chrono.hpp +0 -72
  119. data/vendor/cereal/include/cereal/types/common.hpp +0 -129
  120. data/vendor/cereal/include/cereal/types/complex.hpp +0 -56
  121. data/vendor/cereal/include/cereal/types/concepts/pair_associative_container.hpp +0 -73
  122. data/vendor/cereal/include/cereal/types/deque.hpp +0 -62
  123. data/vendor/cereal/include/cereal/types/forward_list.hpp +0 -68
  124. data/vendor/cereal/include/cereal/types/functional.hpp +0 -43
  125. data/vendor/cereal/include/cereal/types/list.hpp +0 -62
  126. data/vendor/cereal/include/cereal/types/map.hpp +0 -36
  127. data/vendor/cereal/include/cereal/types/memory.hpp +0 -425
  128. data/vendor/cereal/include/cereal/types/optional.hpp +0 -66
  129. data/vendor/cereal/include/cereal/types/polymorphic.hpp +0 -483
  130. data/vendor/cereal/include/cereal/types/queue.hpp +0 -132
  131. data/vendor/cereal/include/cereal/types/set.hpp +0 -103
  132. data/vendor/cereal/include/cereal/types/stack.hpp +0 -76
  133. data/vendor/cereal/include/cereal/types/string.hpp +0 -61
  134. data/vendor/cereal/include/cereal/types/tuple.hpp +0 -123
  135. data/vendor/cereal/include/cereal/types/unordered_map.hpp +0 -36
  136. data/vendor/cereal/include/cereal/types/unordered_set.hpp +0 -99
  137. data/vendor/cereal/include/cereal/types/utility.hpp +0 -47
  138. data/vendor/cereal/include/cereal/types/valarray.hpp +0 -89
  139. data/vendor/cereal/include/cereal/types/variant.hpp +0 -109
  140. data/vendor/cereal/include/cereal/types/vector.hpp +0 -112
  141. data/vendor/cereal/include/cereal/version.hpp +0 -52
  142. data/vendor/isotree/src/Makevars +0 -4
  143. data/vendor/isotree/src/crit.cpp +0 -912
  144. data/vendor/isotree/src/dist.cpp +0 -749
  145. data/vendor/isotree/src/extended.cpp +0 -790
  146. data/vendor/isotree/src/fit_model.cpp +0 -1090
  147. data/vendor/isotree/src/helpers_iforest.cpp +0 -324
  148. data/vendor/isotree/src/isoforest.cpp +0 -771
  149. data/vendor/isotree/src/mult.cpp +0 -607
  150. data/vendor/isotree/src/predict.cpp +0 -853
  151. data/vendor/isotree/src/utils.cpp +0 -1566
@@ -0,0 +1,842 @@
1
+ /* Isolation forests and variations thereof, with adjustments for incorporation
2
+ * of categorical variables and missing values.
3
+ * Writen for C++11 standard and aimed at being used in R and Python.
4
+ *
5
+ * This library is based on the following works:
6
+ * [1] Liu, Fei Tony, Kai Ming Ting, and Zhi-Hua Zhou.
7
+ * "Isolation forest."
8
+ * 2008 Eighth IEEE International Conference on Data Mining. IEEE, 2008.
9
+ * [2] Liu, Fei Tony, Kai Ming Ting, and Zhi-Hua Zhou.
10
+ * "Isolation-based anomaly detection."
11
+ * ACM Transactions on Knowledge Discovery from Data (TKDD) 6.1 (2012): 3.
12
+ * [3] Hariri, Sahand, Matias Carrasco Kind, and Robert J. Brunner.
13
+ * "Extended Isolation Forest."
14
+ * arXiv preprint arXiv:1811.02141 (2018).
15
+ * [4] Liu, Fei Tony, Kai Ming Ting, and Zhi-Hua Zhou.
16
+ * "On detecting clustered anomalies using SCiForest."
17
+ * Joint European Conference on Machine Learning and Knowledge Discovery in Databases. Springer, Berlin, Heidelberg, 2010.
18
+ * [5] https://sourceforge.net/projects/iforest/
19
+ * [6] https://math.stackexchange.com/questions/3388518/expected-number-of-paths-required-to-separate-elements-in-a-binary-tree
20
+ * [7] Quinlan, J. Ross. C4. 5: programs for machine learning. Elsevier, 2014.
21
+ * [8] Cortes, David.
22
+ * "Distance approximation using Isolation Forests."
23
+ * arXiv preprint arXiv:1910.12362 (2019).
24
+ * [9] Cortes, David.
25
+ * "Imputing missing values with unsupervised random trees."
26
+ * arXiv preprint arXiv:1911.06646 (2019).
27
+ * [10] https://math.stackexchange.com/questions/3333220/expected-average-depth-in-random-binary-tree-constructed-top-to-bottom
28
+ * [11] Cortes, David.
29
+ * "Revisiting randomized choices in isolation forests."
30
+ * arXiv preprint arXiv:2110.13402 (2021).
31
+ * [12] Guha, Sudipto, et al.
32
+ * "Robust random cut forest based anomaly detection on streams."
33
+ * International conference on machine learning. PMLR, 2016.
34
+ * [13] Cortes, David.
35
+ * "Isolation forests: looking beyond tree depth."
36
+ * arXiv preprint arXiv:2111.11639 (2021).
37
+ * [14] Ting, Kai Ming, Yue Zhu, and Zhi-Hua Zhou.
38
+ * "Isolation kernel and its effect on SVM"
39
+ * Proceedings of the 24th ACM SIGKDD
40
+ * International Conference on Knowledge Discovery & Data Mining. 2018.
41
+ *
42
+ * BSD 2-Clause License
43
+ * Copyright (c) 2019-2022, David Cortes
44
+ * All rights reserved.
45
+ * Redistribution and use in source and binary forms, with or without
46
+ * modification, are permitted provided that the following conditions are met:
47
+ * * Redistributions of source code must retain the above copyright notice, this
48
+ * list of conditions and the following disclaimer.
49
+ * * Redistributions in binary form must reproduce the above copyright notice,
50
+ * this list of conditions and the following disclaimer in the documentation
51
+ * and/or other materials provided with the distribution.
52
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
53
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
55
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
56
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
58
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
59
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
60
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
61
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
62
+ */
63
+ #if !defined(_FOR_R) && !defined(_FOR_PYTHON)
64
+ #include "isotree.hpp"
65
+ #include "isotree_exportable.hpp"
66
+ #include "oop_interface.hpp"
67
+ using namespace isotree;
68
+
69
+ IsolationForest::IsolationForest
70
+ (
71
+ size_t ndim, size_t ntry, CoefType coef_type, bool coef_by_prop,
72
+ bool with_replacement, bool weight_as_sample,
73
+ size_t sample_size, size_t ntrees,
74
+ size_t max_depth, size_t ncols_per_tree, bool limit_depth,
75
+ bool penalize_range, bool standardize_data,
76
+ ScoringMetric scoring_metric, bool fast_bratio, bool weigh_by_kurt,
77
+ double prob_pick_by_gain_pl, double prob_pick_by_gain_avg,
78
+ double prob_pick_by_full_gain, double prob_pick_by_dens,
79
+ double prob_pick_col_by_range, double prob_pick_col_by_var,
80
+ double prob_pick_col_by_kurt,
81
+ double min_gain, MissingAction missing_action,
82
+ CategSplit cat_split_type, NewCategAction new_cat_action,
83
+ bool all_perm, bool build_imputer, size_t min_imp_obs,
84
+ UseDepthImp depth_imp, WeighImpRows weigh_imp_rows,
85
+ uint64_t random_seed, int nthreads
86
+ )
87
+ :
88
+ ndim(ndim),
89
+ ntry(ntry),
90
+ coef_type(coef_type),
91
+ coef_by_prop(coef_by_prop),
92
+ with_replacement(with_replacement),
93
+ weight_as_sample(weight_as_sample),
94
+ sample_size(sample_size),
95
+ ntrees(ntrees),
96
+ max_depth(max_depth),
97
+ ncols_per_tree(ncols_per_tree),
98
+ limit_depth(limit_depth),
99
+ penalize_range(penalize_range),
100
+ standardize_data(standardize_data),
101
+ scoring_metric(scoring_metric),
102
+ fast_bratio(fast_bratio),
103
+ weigh_by_kurt(weigh_by_kurt),
104
+ prob_pick_by_gain_pl(prob_pick_by_gain_pl),
105
+ prob_pick_by_gain_avg(prob_pick_by_gain_avg),
106
+ prob_pick_by_full_gain(prob_pick_by_full_gain),
107
+ prob_pick_by_dens(prob_pick_by_dens),
108
+ prob_pick_col_by_range(prob_pick_col_by_range),
109
+ prob_pick_col_by_var(prob_pick_col_by_var),
110
+ prob_pick_col_by_kurt(prob_pick_col_by_kurt),
111
+ min_gain(min_gain),
112
+ missing_action(missing_action),
113
+ cat_split_type(cat_split_type),
114
+ new_cat_action(new_cat_action),
115
+ all_perm(all_perm),
116
+ build_imputer(build_imputer),
117
+ min_imp_obs(min_imp_obs),
118
+ depth_imp(depth_imp),
119
+ weigh_imp_rows(weigh_imp_rows),
120
+ random_seed(random_seed)
121
+ {}
122
+
123
+
124
+ void IsolationForest::fit(double X[], size_t nrows, size_t ncols)
125
+ {
126
+ this->check_params();
127
+ this->override_previous_fit();
128
+
129
+ auto retcode = fit_iforest(
130
+ (this->ndim == 1)? &this->model : nullptr,
131
+ (this->ndim != 1)? &this->model_ext : nullptr,
132
+ X, ncols,
133
+ (int*)nullptr, (size_t)0, (int*)nullptr,
134
+ (double*)nullptr, (int*)nullptr, (int*)nullptr,
135
+ this->ndim, this->ntry, this->coef_type, this->coef_by_prop,
136
+ (double*)nullptr, this->with_replacement, this->weight_as_sample,
137
+ nrows, this->sample_size, this->ntrees,
138
+ this->max_depth, this->ncols_per_tree,
139
+ this->limit_depth, this->penalize_range, this->standardize_data,
140
+ this->scoring_metric, this->fast_bratio,
141
+ false, (double*)nullptr,
142
+ (double*)nullptr, true,
143
+ (double*)nullptr, this->weigh_by_kurt,
144
+ this->prob_pick_by_gain_pl,
145
+ this->prob_pick_by_gain_avg,
146
+ this->prob_pick_by_full_gain,
147
+ this->prob_pick_by_dens,
148
+ this->prob_pick_col_by_range,
149
+ this->prob_pick_col_by_var,
150
+ this->prob_pick_col_by_kurt,
151
+ this->min_gain, this->missing_action,
152
+ this->cat_split_type, this->new_cat_action,
153
+ this->all_perm, &this->imputer, this->min_imp_obs,
154
+ this->depth_imp, this->weigh_imp_rows, false,
155
+ this->random_seed, false, this->nthreads
156
+ );
157
+ if (retcode != EXIT_SUCCESS) unexpected_error();
158
+ this->is_fitted = true;
159
+ }
160
+
161
+ void IsolationForest::fit(double numeric_data[], size_t ncols_numeric, size_t nrows,
162
+ int categ_data[], size_t ncols_categ, int ncat[],
163
+ double sample_weights[], double col_weights[])
164
+ {
165
+ this->check_params();
166
+ this->override_previous_fit();
167
+
168
+ auto retcode = fit_iforest(
169
+ (this->ndim == 1)? &this->model : nullptr,
170
+ (this->ndim != 1)? &this->model_ext : nullptr,
171
+ numeric_data, ncols_numeric,
172
+ categ_data, ncols_categ, ncat,
173
+ (double*)nullptr, (int*)nullptr, (int*)nullptr,
174
+ this->ndim, this->ntry, this->coef_type, this->coef_by_prop,
175
+ sample_weights, this->with_replacement, this->weight_as_sample,
176
+ nrows, this->sample_size, this->ntrees,
177
+ this->max_depth, this->ncols_per_tree,
178
+ this->limit_depth, this->penalize_range, this->standardize_data,
179
+ this->scoring_metric, this->fast_bratio,
180
+ false, (double*)nullptr,
181
+ (double*)nullptr, true,
182
+ col_weights, this->weigh_by_kurt,
183
+ this->prob_pick_by_gain_pl,
184
+ this->prob_pick_by_gain_avg,
185
+ this->prob_pick_by_full_gain,
186
+ this->prob_pick_by_dens,
187
+ this->prob_pick_col_by_range,
188
+ this->prob_pick_col_by_var,
189
+ this->prob_pick_col_by_kurt,
190
+ this->min_gain, this->missing_action,
191
+ this->cat_split_type, this->new_cat_action,
192
+ this->all_perm, &this->imputer, this->min_imp_obs,
193
+ this->depth_imp, this->weigh_imp_rows, false,
194
+ this->random_seed, false, this->nthreads
195
+ );
196
+ if (retcode != EXIT_SUCCESS) unexpected_error();
197
+ this->is_fitted = true;
198
+ }
199
+
200
+ void IsolationForest::fit(double Xc[], int Xc_ind[], int Xc_indptr[],
201
+ size_t ncols_numeric, size_t nrows,
202
+ int categ_data[], size_t ncols_categ, int ncat[],
203
+ double sample_weights[], double col_weights[])
204
+ {
205
+ this->check_params();
206
+ this->override_previous_fit();
207
+
208
+ auto retcode = fit_iforest(
209
+ (this->ndim == 1)? &this->model : nullptr,
210
+ (this->ndim != 1)? &this->model_ext : nullptr,
211
+ (double*)nullptr, ncols_numeric,
212
+ categ_data, ncols_categ, ncat,
213
+ Xc, Xc_ind, Xc_indptr,
214
+ this->ndim, this->ntry, this->coef_type, this->coef_by_prop,
215
+ sample_weights, this->with_replacement, this->weight_as_sample,
216
+ nrows, this->sample_size, this->ntrees,
217
+ this->max_depth, this->ncols_per_tree,
218
+ this->limit_depth, this->penalize_range, this->standardize_data,
219
+ this->scoring_metric, this->fast_bratio,
220
+ false, (double*)nullptr,
221
+ (double*)nullptr, true,
222
+ col_weights, this->weigh_by_kurt,
223
+ this->prob_pick_by_gain_pl,
224
+ this->prob_pick_by_gain_avg,
225
+ this->prob_pick_by_full_gain,
226
+ this->prob_pick_by_dens,
227
+ this->prob_pick_col_by_range,
228
+ this->prob_pick_col_by_var,
229
+ this->prob_pick_col_by_kurt,
230
+ this->min_gain, this->missing_action,
231
+ this->cat_split_type, this->new_cat_action,
232
+ this->all_perm, &this->imputer, this->min_imp_obs,
233
+ this->depth_imp, this->weigh_imp_rows, false,
234
+ this->random_seed, false, this->nthreads
235
+ );
236
+ if (retcode != EXIT_SUCCESS) unexpected_error();
237
+ this->is_fitted = true;
238
+ }
239
+
240
+ std::vector<double> IsolationForest::predict(double X[], size_t nrows, bool standardize)
241
+ {
242
+ this->check_is_fitted();
243
+ this->check_nthreads();
244
+ std::vector<double> out(nrows);
245
+ predict_iforest(
246
+ X, (int*)nullptr,
247
+ true, (size_t)0, (size_t)0,
248
+ (double*)nullptr, (int*)nullptr, (int*)nullptr,
249
+ (double*)nullptr, (int*)nullptr, (int*)nullptr,
250
+ nrows, this->nthreads, standardize,
251
+ (!this->model.trees.empty())? &this->model : nullptr,
252
+ (!this->model_ext.hplanes.empty())? &this->model_ext : nullptr,
253
+ out.data(), (int*)nullptr, (double*)nullptr,
254
+ (TreesIndexer*)nullptr);
255
+ return out;
256
+ }
257
+
258
+ void IsolationForest::predict(double numeric_data[], int categ_data[], bool is_col_major,
259
+ size_t nrows, size_t ld_numeric, size_t ld_categ, bool standardize,
260
+ double output_depths[], int tree_num[], double per_tree_depths[])
261
+ {
262
+ this->check_is_fitted();
263
+ this->check_nthreads();
264
+ if ((tree_num || per_tree_depths) && !this->check_can_predict_per_tree())
265
+ throw std::runtime_error("Cannot predict tree numbers/depths with this model.\n");
266
+ predict_iforest(
267
+ numeric_data, categ_data,
268
+ is_col_major, ld_numeric, ld_categ,
269
+ (double*)nullptr, (int*)nullptr, (int*)nullptr,
270
+ (double*)nullptr, (int*)nullptr, (int*)nullptr,
271
+ nrows, this->nthreads, standardize,
272
+ (!this->model.trees.empty())? &this->model : nullptr,
273
+ (!this->model_ext.hplanes.empty())? &this->model_ext : nullptr,
274
+ output_depths, tree_num, per_tree_depths,
275
+ (!this->indexer.indices.empty())? &this->indexer : nullptr);
276
+ }
277
+
278
+ void IsolationForest::predict(double X_sparse[], int X_ind[], int X_indptr[], bool is_csc,
279
+ int categ_data[], bool is_col_major, size_t ld_categ, size_t nrows, bool standardize,
280
+ double output_depths[], int tree_num[], double per_tree_depths[])
281
+ {
282
+ this->check_is_fitted();
283
+ this->check_nthreads();
284
+ if ((tree_num || per_tree_depths) && !this->check_can_predict_per_tree())
285
+ throw std::runtime_error("Cannot predict tree numbers/depths with this model.\n");
286
+ std::vector<double> out(nrows);
287
+ predict_iforest(
288
+ (double*)nullptr, categ_data,
289
+ is_col_major, (size_t)0, ld_categ,
290
+ is_csc? X_sparse : (double*)nullptr, is_csc? X_ind : (int*)nullptr, is_csc? X_indptr : (int*)nullptr,
291
+ is_csc? (double*)nullptr : X_sparse, is_csc? (int*)nullptr : X_ind, is_csc? (int*)nullptr : X_indptr,
292
+ nrows, this->nthreads, standardize,
293
+ (!this->model.trees.empty())? &this->model : nullptr,
294
+ (!this->model_ext.hplanes.empty())? &this->model_ext : nullptr,
295
+ output_depths, tree_num, per_tree_depths,
296
+ (!this->indexer.indices.empty())? &this->indexer : nullptr);
297
+ }
298
+
299
+ std::vector<double> IsolationForest::predict_distance(double X[], size_t nrows,
300
+ bool as_kernel,
301
+ bool assume_full_distr, bool standardize,
302
+ bool triangular)
303
+ {
304
+ this->check_is_fitted();
305
+ this->check_nthreads();
306
+ std::vector<double> tmat(calc_ncomb(nrows));
307
+ std::vector<double> dmat(triangular? square(nrows) : 0);
308
+
309
+ calc_similarity(X, (int*)nullptr,
310
+ (double*)nullptr, (int*)nullptr, (int*)nullptr,
311
+ nrows, false, this->nthreads, assume_full_distr, standardize, as_kernel,
312
+ (!this->model.trees.empty())? &this->model : nullptr,
313
+ (!this->model_ext.hplanes.empty())? &this->model_ext : nullptr,
314
+ tmat.data(), (double*)nullptr, (size_t)0, false,
315
+ (!this->indexer.indices.empty())? &this->indexer : nullptr,
316
+ true, (size_t)0, (size_t)0);
317
+ if (!triangular) {
318
+ double diag_filler;
319
+ if (as_kernel) {
320
+ if (standardize)
321
+ diag_filler = 1.;
322
+ else
323
+ diag_filler = std::max(this->model.trees.size(), this->model_ext.hplanes.size());
324
+ }
325
+ else {
326
+ if (standardize)
327
+ diag_filler = 0;
328
+ else
329
+ diag_filler = std::numeric_limits<double>::infinity();
330
+ }
331
+ tmat_to_dense(tmat.data(), dmat.data(), nrows, diag_filler);
332
+ }
333
+ return (triangular? tmat : dmat);
334
+ }
335
+
336
+ void IsolationForest::predict_distance(double numeric_data[], int categ_data[],
337
+ size_t nrows,
338
+ bool as_kernel,
339
+ bool assume_full_distr, bool standardize,
340
+ bool triangular,
341
+ double dist_matrix[])
342
+ {
343
+ this->check_is_fitted();
344
+ this->check_nthreads();
345
+ std::vector<double> tmat(triangular? 0 : calc_ncomb(nrows));
346
+
347
+ calc_similarity(numeric_data, categ_data,
348
+ (double*)nullptr, (int*)nullptr, (int*)nullptr,
349
+ nrows, false, this->nthreads, assume_full_distr, standardize, as_kernel,
350
+ (!this->model.trees.empty())? &this->model : nullptr,
351
+ (!this->model_ext.hplanes.empty())? &this->model_ext : nullptr,
352
+ triangular? dist_matrix : tmat.data(),
353
+ (double*)nullptr, (size_t)0, false,
354
+ (!this->indexer.indices.empty())? &this->indexer : nullptr,
355
+ true, (size_t)0, (size_t)0);
356
+ if (!triangular) {
357
+ double diag_filler;
358
+ if (as_kernel) {
359
+ if (standardize)
360
+ diag_filler = 1.;
361
+ else
362
+ diag_filler = std::max(this->model.trees.size(), this->model_ext.hplanes.size());
363
+ }
364
+ else {
365
+ if (standardize)
366
+ diag_filler = 0;
367
+ else
368
+ diag_filler = std::numeric_limits<double>::infinity();
369
+ }
370
+ tmat_to_dense(tmat.data(), dist_matrix, nrows, diag_filler);
371
+ }
372
+ }
373
+
374
+ void IsolationForest::predict_distance(double Xc[], int Xc_ind[], int Xc_indptr[], int categ_data[],
375
+ size_t nrows,
376
+ bool as_kernel,
377
+ bool assume_full_distr, bool standardize,
378
+ bool triangular,
379
+ double dist_matrix[])
380
+ {
381
+ this->check_is_fitted();
382
+ this->check_nthreads();
383
+ std::vector<double> tmat(triangular? 0 : calc_ncomb(nrows));
384
+
385
+ calc_similarity((double*)nullptr, (int*)nullptr,
386
+ Xc, Xc_ind, Xc_indptr,
387
+ nrows, false, this->nthreads, assume_full_distr, standardize, as_kernel,
388
+ (!this->model.trees.empty())? &this->model : nullptr,
389
+ (!this->model_ext.hplanes.empty())? &this->model_ext : nullptr,
390
+ triangular? dist_matrix : tmat.data(),
391
+ (double*)nullptr, (size_t)0, false,
392
+ (!this->indexer.indices.empty())? &this->indexer : nullptr,
393
+ true, (size_t)0, (size_t)0);
394
+ if (!triangular) {
395
+ double diag_filler;
396
+ if (as_kernel) {
397
+ if (standardize)
398
+ diag_filler = 1.;
399
+ else
400
+ diag_filler = std::max(this->model.trees.size(), this->model_ext.hplanes.size());
401
+ }
402
+ else {
403
+ if (standardize)
404
+ diag_filler = 0;
405
+ else
406
+ diag_filler = std::numeric_limits<double>::infinity();
407
+ }
408
+ tmat_to_dense(tmat.data(), dist_matrix, nrows, diag_filler);
409
+ }
410
+ }
411
+
412
+ void IsolationForest::impute(double X[], size_t nrows)
413
+ {
414
+ this->check_is_fitted();
415
+ this->check_nthreads();
416
+ if (this->imputer.imputer_tree.empty())
417
+ throw std::runtime_error("Model was built without imputation capabilities.\n");
418
+ impute_missing_values(X, (int*)nullptr, true,
419
+ (double*)nullptr, (int*)nullptr, (int*)nullptr,
420
+ nrows, false, this->nthreads,
421
+ (!this->model.trees.empty())? &this->model : nullptr,
422
+ (!this->model_ext.hplanes.empty())? &this->model_ext : nullptr,
423
+ this->imputer);
424
+ }
425
+
426
+ void IsolationForest::impute(double numeric_data[], int categ_data[], bool is_col_major, size_t nrows)
427
+ {
428
+ this->check_is_fitted();
429
+ if (this->imputer.imputer_tree.empty())
430
+ throw std::runtime_error("Model was built without imputation capabilities.\n");
431
+ this->check_nthreads();
432
+ impute_missing_values(numeric_data, categ_data, is_col_major,
433
+ (double*)nullptr, (int*)nullptr, (int*)nullptr,
434
+ nrows, false, this->nthreads,
435
+ (!this->model.trees.empty())? &this->model : nullptr,
436
+ (!this->model_ext.hplanes.empty())? &this->model_ext : nullptr,
437
+ this->imputer);
438
+ }
439
+
440
+ void IsolationForest::impute(double Xr[], int Xr_ind[], int Xr_indptr[],
441
+ int categ_data[], bool is_col_major, size_t nrows)
442
+ {
443
+ this->check_is_fitted();
444
+ if (this->imputer.imputer_tree.empty())
445
+ throw std::runtime_error("Model was built without imputation capabilities.\n");
446
+ this->check_nthreads();
447
+ impute_missing_values((double*)nullptr, categ_data, is_col_major,
448
+ Xr, Xr_ind, Xr_indptr,
449
+ nrows, false, this->nthreads,
450
+ (!this->model.trees.empty())? &this->model : nullptr,
451
+ (!this->model_ext.hplanes.empty())? &this->model_ext : nullptr,
452
+ this->imputer);
453
+ }
454
+
455
+ void IsolationForest::build_indexer(const bool with_distances)
456
+ {
457
+ this->check_is_fitted();
458
+ if (!this->indexer.indices.empty())
459
+ return;
460
+ if (this->missing_action == Divide)
461
+ throw std::runtime_error("Cannot build tree indexer when using 'missing_action=Divide'.\n");
462
+ if (!this->model.trees.empty() && this->new_cat_action == Weighted && this->cat_split_type == SubSet)
463
+ throw std::runtime_error("Cannot build tree indexer when using 'new_cat_action=Weighted' with single-variable model.\n");
464
+
465
+ if (!this->model.trees.empty())
466
+ build_tree_indices(this->indexer, this->model, this->nthreads, with_distances);
467
+ else if (!this->model_ext.hplanes.empty())
468
+ build_tree_indices(this->indexer, this->model_ext, this->nthreads, with_distances);
469
+ else
470
+ unexpected_error();
471
+ }
472
+
473
+ void IsolationForest::set_as_reference_points(double numeric_data[], int categ_data[], bool is_col_major,
474
+ size_t nrows, size_t ld_numeric, size_t ld_categ,
475
+ const bool with_distances)
476
+ {
477
+ this->check_is_fitted();
478
+ if (!this->model.trees.empty())
479
+ set_reference_points(&this->model, (ExtIsoForest*)NULL, &this->indexer,
480
+ with_distances,
481
+ numeric_data, categ_data,
482
+ is_col_major, ld_numeric, ld_categ,
483
+ (double*)NULL, (int*)NULL, (int*)NULL,
484
+ (double*)NULL, (int*)NULL, (int*)NULL,
485
+ nrows, this->nthreads);
486
+ else
487
+ set_reference_points((IsoForest*)NULL, &this->model_ext, &this->indexer,
488
+ with_distances,
489
+ numeric_data, categ_data,
490
+ is_col_major, ld_numeric, ld_categ,
491
+ (double*)NULL, (int*)NULL, (int*)NULL,
492
+ (double*)NULL, (int*)NULL, (int*)NULL,
493
+ nrows, this->nthreads);
494
+ }
495
+
496
+ void IsolationForest::set_as_reference_points(double Xc[], int Xc_ind[], int Xc_indptr[], int categ_data[],
497
+ size_t nrows, const bool with_distances)
498
+ {
499
+ this->check_is_fitted();
500
+ if (!this->model.trees.empty())
501
+ set_reference_points(&this->model, (ExtIsoForest*)NULL, &this->indexer,
502
+ with_distances,
503
+ (double*)NULL, (int*)NULL,
504
+ true, (size_t)0, (size_t)0,
505
+ Xc, Xc_ind, Xc_indptr,
506
+ (double*)NULL, (int*)NULL, (int*)NULL,
507
+ nrows, this->nthreads);
508
+ else
509
+ set_reference_points((IsoForest*)NULL, &this->model_ext, &this->indexer,
510
+ with_distances,
511
+ (double*)NULL, (int*)NULL,
512
+ true, (size_t)0, (size_t)0,
513
+ Xc, Xc_ind, Xc_indptr,
514
+ (double*)NULL, (int*)NULL, (int*)NULL,
515
+ nrows, this->nthreads);
516
+ }
517
+
518
+ size_t IsolationForest::get_num_reference_points() const noexcept
519
+ {
520
+ return get_number_of_reference_points(this->indexer);
521
+ }
522
+
523
+ void IsolationForest::predict_distance_to_ref_points(double numeric_data[], int categ_data[],
524
+ double Xc[], int Xc_ind[], int Xc_indptr[],
525
+ size_t nrows, bool is_col_major, size_t ld_numeric, size_t ld_categ,
526
+ bool as_kernel, bool standardize,
527
+ double dist_matrix[])
528
+ {
529
+ this->check_is_fitted();
530
+ if (this->indexer.indices.empty())
531
+ throw std::runtime_error("Model has no indexer. Cannot predict distances to indexer.\n");
532
+ if (!as_kernel && this->indexer.indices.front().node_distances.empty())
533
+ throw std::runtime_error("Model's indexer was built without distances. Cannot calculate distances to reference points.\n");
534
+ if (this->indexer.indices.front().reference_points.empty())
535
+ throw std::runtime_error("Model's indexer has no reference points. Cannot calculate distances to reference points.\n");
536
+ if (dist_matrix == NULL)
537
+ throw std::runtime_error("Passed a NULL pointer for 'dist_matrix'.\n");
538
+
539
+ calc_similarity(numeric_data, categ_data,
540
+ Xc, Xc_ind, Xc_indptr,
541
+ nrows, false, this->nthreads, true, standardize, as_kernel,
542
+ (!this->model.trees.empty())? &this->model : NULL,
543
+ (!this->model_ext.hplanes.empty())? &this->model_ext : NULL,
544
+ (double*)NULL, dist_matrix, (size_t)0, true,
545
+ &this->indexer, is_col_major, ld_numeric, ld_categ);
546
+ }
547
+
548
+ void IsolationForest::serialize(FILE *out) const
549
+ {
550
+ this->serialize_template(out);
551
+ }
552
+
553
+ void IsolationForest::serialize(std::ostream &out) const
554
+ {
555
+ this->serialize_template(out);
556
+ }
557
+
558
+ IsolationForest IsolationForest::deserialize(FILE *inp, int nthreads)
559
+ {
560
+ return deserialize_template(inp, nthreads);
561
+ }
562
+
563
+ IsolationForest IsolationForest::deserialize(std::istream &inp, int nthreads)
564
+ {
565
+ return deserialize_template(inp, nthreads);
566
+ }
567
+
568
+ std::ostream& operator<<(std::ostream &ost, const IsolationForest &model)
569
+ {
570
+ model.serialize(ost);
571
+ return ost;
572
+ }
573
+
574
+
575
+ std::ostream& isotree::operator<<(std::ostream &ost, const IsolationForest &model)
576
+ {
577
+ model.serialize(ost);
578
+ return ost;
579
+ }
580
+
581
+ std::istream& operator>>(std::istream &ist, IsolationForest &model)
582
+ {
583
+ model = IsolationForest::deserialize(ist, -1);
584
+ return ist;
585
+ }
586
+
587
+ std::istream& isotree::operator>>(std::istream &ist, IsolationForest &model)
588
+ {
589
+ model = IsolationForest::deserialize(ist, -1);
590
+ return ist;
591
+ }
592
+
593
+ IsoForest& IsolationForest::get_model()
594
+ {
595
+ if (this->ndim != 1)
596
+ throw std::runtime_error("Error: class contains an 'ExtIsoForest' model only.\n");
597
+ return this->model;
598
+ }
599
+
600
+ ExtIsoForest& IsolationForest::get_model_ext()
601
+ {
602
+ if (this->ndim == 1)
603
+ throw std::runtime_error("Error: class contains an 'IsoForest' model only.\n");
604
+ return this->model_ext;
605
+ }
606
+
607
+ Imputer& IsolationForest::get_imputer()
608
+ {
609
+ if (!this->build_imputer)
610
+ throw std::runtime_error("Error: model does not contain imputer.\n");
611
+ return this->imputer;
612
+ }
613
+
614
+ TreesIndexer& IsolationForest::get_indexer()
615
+ {
616
+ if (this->indexer.indices.empty() && (!this->model.trees.empty() || !this->model_ext.hplanes.empty()))
617
+ throw std::runtime_error("Error: model does not contain indexer.\n");
618
+ return this->indexer;
619
+ }
620
+
621
+ void IsolationForest::check_nthreads()
622
+ {
623
+ if (this->nthreads < 0) {
624
+ #ifdef _OPENMP
625
+ this->nthreads = omp_get_max_threads() + this->nthreads + 1;
626
+ #else
627
+ this->nthreads = 1;
628
+ #endif
629
+ }
630
+ if (nthreads <= 0) {
631
+ fprintf(stderr, "'isotree' got invalid 'nthreads', will set to 1.\n");
632
+ this->nthreads = 1;
633
+ }
634
+ #ifndef _OPENMP
635
+ else if (nthreads > 1) {
636
+ fprintf(stderr,
637
+ "Passed nthreads:%d to 'isotree', but library was compiled without multithreading.\n",
638
+ this->nthreads);
639
+ this->nthreads = 1;
640
+ }
641
+ #endif
642
+ }
643
+
644
+ size_t IsolationForest::get_ntrees() const
645
+ {
646
+ if (!this->model.trees.empty())
647
+ return this->model.trees.size();
648
+ else if (!this->model_ext.hplanes.empty())
649
+ return this->model_ext.hplanes.size();
650
+ else
651
+ throw std::runtime_error("Model is not fitted or is corrupted.\n");
652
+ }
653
+
654
+ bool IsolationForest::check_can_predict_per_tree() const
655
+ {
656
+ if (!this->model.trees.empty())
657
+ {
658
+ if (this->model.missing_action == Divide)
659
+ return false;
660
+ if (this->model.new_cat_action == Weighted && this->cat_split_type != SingleCateg)
661
+ {
662
+ for (const std::vector<IsoTree> &tree : this->model.trees)
663
+ for (const IsoTree &node : tree)
664
+ if (node.col_type == Categorical)
665
+ return false;
666
+ }
667
+ }
668
+
669
+ return true;
670
+ }
671
+
672
+ void IsolationForest::override_previous_fit()
673
+ {
674
+ if (this->is_fitted) {
675
+ this->model = IsoForest();
676
+ this->model_ext = ExtIsoForest();
677
+ this->imputer = Imputer();
678
+ this->indexer = TreesIndexer();
679
+ }
680
+ }
681
+
682
+ void IsolationForest::check_params()
683
+ {
684
+ this->check_nthreads();
685
+
686
+ if (this->prob_pick_by_gain_avg < 0) throw std::runtime_error("'prob_pick_by_gain_avg' must be >= 0.\n");
687
+ if (this->prob_pick_by_gain_pl < 0) throw std::runtime_error("'prob_pick_by_gain_pl' must be >= 0.\n");
688
+ if (this->prob_pick_by_full_gain < 0) throw std::runtime_error("'prob_pick_by_full_gain' must be >= 0.\n");
689
+ if (this->prob_pick_by_dens < 0) throw std::runtime_error("'prob_pick_by_dens' must be >= 0.\n");
690
+ if (this->prob_pick_col_by_range < 0) throw std::runtime_error("'prob_pick_col_by_range' must be >= 0.\n");
691
+ if (this->prob_pick_col_by_var < 0) throw std::runtime_error("'prob_pick_col_by_var' must be >= 0.\n");
692
+ if (this->prob_pick_col_by_kurt < 0) throw std::runtime_error("'prob_pick_col_by_kurt' must be >= 0.\n");
693
+
694
+ if (prob_pick_by_gain_avg + prob_pick_by_gain_pl + prob_pick_by_full_gain + prob_pick_by_dens
695
+ > 1. + 2. * std::numeric_limits<double>::epsilon())
696
+ throw std::runtime_error("Probabilities for gain-based splits sum to more than 1.\n");
697
+
698
+ if (prob_pick_col_by_var + prob_pick_col_by_var + prob_pick_col_by_kurt
699
+ > 1. + 2. * std::numeric_limits<double>::epsilon())
700
+ throw std::runtime_error("Probabilities for column choices sum to more than 1.\n");
701
+
702
+ if (min_gain < 0)
703
+ throw std::runtime_error("'min_gain' cannot be negative.\n");
704
+
705
+ if (this->ndim != 1) {
706
+ if (this->missing_action == Divide)
707
+ throw std::runtime_error("'missing_action' = 'Divide' not supported in extended model.\n");
708
+ }
709
+
710
+ if (this->coef_type != Uniform && this->coef_type != Normal)
711
+ throw std::runtime_error("Invalid 'coef_type'.\n");
712
+ if (this->missing_action != Divide && this->missing_action != Impute && this->missing_action != Fail)
713
+ throw std::runtime_error("Invalid 'missing_action'.\n");
714
+ if (this->cat_split_type != SubSet && this->cat_split_type != SingleCateg)
715
+ throw std::runtime_error("Invalid 'cat_split_type'.\n");
716
+ if (this->new_cat_action != Weighted && this->new_cat_action != Smallest && this->new_cat_action != Random)
717
+ throw std::runtime_error("Invalid 'new_cat_action'.\n");
718
+ if (this->depth_imp != Lower && this->depth_imp != Higher && this->depth_imp != Same)
719
+ throw std::runtime_error("Invalid 'depth_imp'.\n");
720
+ if (this->weigh_imp_rows != Inverse && this->weigh_imp_rows != Prop && this->weigh_imp_rows != Flat)
721
+ throw std::runtime_error("Invalid 'weigh_imp_rows'.\n");
722
+
723
+ if (this->sample_size > 0 && this->sample_size <= 2)
724
+ throw std::runtime_error("'sample_size' must be greater than 2.\n");
725
+
726
+ if (this->penalize_range && (this->scoring_metric == Density || this->scoring_metric == AdjDensity))
727
+ throw std::runtime_error("'penalize_range' is incompatible with density scoring.\n");
728
+ }
729
+
730
+ void IsolationForest::check_is_fitted() const
731
+ {
732
+ if (!this->is_fitted)
733
+ throw std::runtime_error("Model has not been fitted.\n");
734
+ }
735
+
736
+ template <class otype>
737
+ void IsolationForest::serialize_template(otype &out) const
738
+ {
739
+ this->check_is_fitted();
740
+
741
+ serialize_combined(
742
+ (!this->model.trees.empty())? &this->model : nullptr,
743
+ (!this->model_ext.hplanes.empty())? &this->model_ext : nullptr,
744
+ (!this->imputer.imputer_tree.empty())? &this->imputer : nullptr,
745
+ (!this->indexer.indices.empty())? &this->indexer : nullptr,
746
+ (char*)nullptr,
747
+ (size_t)0,
748
+ out
749
+ );
750
+ }
751
+
752
+ IsolationForest::IsolationForest(int nthreads, size_t ndim, size_t ntrees, bool build_imputer)
753
+ :
754
+ nthreads(nthreads),
755
+ ndim(ndim),
756
+ ntrees(ntrees),
757
+ build_imputer(build_imputer) {this->is_fitted = true;};
758
+
759
+ template <class itype>
760
+ IsolationForest IsolationForest::deserialize_template(itype &inp, int nthreads)
761
+ {
762
+ bool is_isotree_model = false;
763
+ bool is_compatible = false;
764
+ bool has_combined_objects = false;
765
+ bool has_IsoForest = false;
766
+ bool has_ExtIsoForest = false;
767
+ bool has_Imputer = false;
768
+ bool has_Indexer = false;
769
+ bool has_metadata = false;
770
+ size_t size_metadata = 0;
771
+ inspect_serialized_object(
772
+ inp,
773
+ is_isotree_model,
774
+ is_compatible,
775
+ has_combined_objects,
776
+ has_IsoForest,
777
+ has_ExtIsoForest,
778
+ has_Imputer,
779
+ has_Indexer,
780
+ has_metadata,
781
+ size_metadata
782
+ );
783
+ if (is_isotree_model && is_compatible && !has_combined_objects)
784
+ throw std::runtime_error("Serialized model is not compatible.\n");
785
+
786
+ IsoForest model = IsoForest();
787
+ ExtIsoForest model_ext = ExtIsoForest();
788
+ Imputer imputer = Imputer();
789
+ TreesIndexer indexer = TreesIndexer();
790
+
791
+ deserialize_combined(
792
+ inp,
793
+ &model,
794
+ &model_ext,
795
+ &imputer,
796
+ &indexer,
797
+ (char*)nullptr
798
+ );
799
+
800
+ if (model.trees.empty() && model_ext.hplanes.empty())
801
+ throw std::runtime_error("Error: model contains no trees.\n");
802
+
803
+ size_t ntrees;
804
+ size_t ndim = 3;
805
+ bool build_imputer = false;
806
+
807
+ if (!model.trees.empty()) {
808
+ ntrees = model.trees.size();
809
+ ndim = 1;
810
+ }
811
+ else {
812
+ ntrees = model_ext.hplanes.size();
813
+ }
814
+ if (!imputer.imputer_tree.empty()) {
815
+ if (imputer.imputer_tree.size() != ntrees)
816
+ throw std::runtime_error("Error: imputer has incorrect number of trees.\n");
817
+ build_imputer = true;
818
+ }
819
+ if (!indexer.indices.empty()) {
820
+ if (indexer.indices.size() != ntrees)
821
+ throw std::runtime_error("Error: indexer has incorrect number of trees.\n");
822
+ }
823
+
824
+ IsolationForest out = IsolationForest(nthreads, ndim, ntrees, build_imputer);
825
+
826
+ if (!model.trees.empty()) {
827
+ out.get_model() = std::move(model);
828
+ out.penalize_range = out.get_model().has_range_penalty;
829
+ }
830
+ else {
831
+ out.get_model_ext() = std::move(model_ext);
832
+ out.penalize_range = out.get_model_ext().has_range_penalty;
833
+ }
834
+ if (!imputer.imputer_tree.empty())
835
+ out.get_imputer() = std::move(imputer);
836
+ if (!indexer.indices.empty())
837
+ out.indexer = std::move(indexer);
838
+
839
+ return out;
840
+ }
841
+
842
+ #endif