isotree 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (151) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +8 -1
  3. data/LICENSE.txt +2 -2
  4. data/README.md +32 -14
  5. data/ext/isotree/ext.cpp +144 -31
  6. data/ext/isotree/extconf.rb +7 -7
  7. data/lib/isotree/isolation_forest.rb +110 -30
  8. data/lib/isotree/version.rb +1 -1
  9. data/vendor/isotree/LICENSE +1 -1
  10. data/vendor/isotree/README.md +165 -27
  11. data/vendor/isotree/include/isotree.hpp +2111 -0
  12. data/vendor/isotree/include/isotree_oop.hpp +394 -0
  13. data/vendor/isotree/inst/COPYRIGHTS +62 -0
  14. data/vendor/isotree/src/RcppExports.cpp +525 -52
  15. data/vendor/isotree/src/Rwrapper.cpp +1931 -268
  16. data/vendor/isotree/src/c_interface.cpp +953 -0
  17. data/vendor/isotree/src/crit.hpp +4232 -0
  18. data/vendor/isotree/src/dist.hpp +1886 -0
  19. data/vendor/isotree/src/exp_depth_table.hpp +134 -0
  20. data/vendor/isotree/src/extended.hpp +1444 -0
  21. data/vendor/isotree/src/external_facing_generic.hpp +399 -0
  22. data/vendor/isotree/src/fit_model.hpp +2401 -0
  23. data/vendor/isotree/src/{dealloc.cpp → headers_joined.hpp} +38 -22
  24. data/vendor/isotree/src/helpers_iforest.hpp +813 -0
  25. data/vendor/isotree/src/{impute.cpp → impute.hpp} +353 -122
  26. data/vendor/isotree/src/indexer.cpp +515 -0
  27. data/vendor/isotree/src/instantiate_template_headers.cpp +118 -0
  28. data/vendor/isotree/src/instantiate_template_headers.hpp +240 -0
  29. data/vendor/isotree/src/isoforest.hpp +1659 -0
  30. data/vendor/isotree/src/isotree.hpp +1804 -392
  31. data/vendor/isotree/src/isotree_exportable.hpp +99 -0
  32. data/vendor/isotree/src/merge_models.cpp +159 -16
  33. data/vendor/isotree/src/mult.hpp +1321 -0
  34. data/vendor/isotree/src/oop_interface.cpp +842 -0
  35. data/vendor/isotree/src/oop_interface.hpp +278 -0
  36. data/vendor/isotree/src/other_helpers.hpp +219 -0
  37. data/vendor/isotree/src/predict.hpp +1932 -0
  38. data/vendor/isotree/src/python_helpers.hpp +134 -0
  39. data/vendor/isotree/src/ref_indexer.hpp +154 -0
  40. data/vendor/isotree/src/robinmap/LICENSE +21 -0
  41. data/vendor/isotree/src/robinmap/README.md +483 -0
  42. data/vendor/isotree/src/robinmap/include/tsl/robin_growth_policy.h +406 -0
  43. data/vendor/isotree/src/robinmap/include/tsl/robin_hash.h +1620 -0
  44. data/vendor/isotree/src/robinmap/include/tsl/robin_map.h +807 -0
  45. data/vendor/isotree/src/robinmap/include/tsl/robin_set.h +660 -0
  46. data/vendor/isotree/src/serialize.cpp +4300 -139
  47. data/vendor/isotree/src/sql.cpp +141 -59
  48. data/vendor/isotree/src/subset_models.cpp +174 -0
  49. data/vendor/isotree/src/utils.hpp +3808 -0
  50. data/vendor/isotree/src/xoshiro.hpp +467 -0
  51. data/vendor/isotree/src/ziggurat.hpp +405 -0
  52. metadata +38 -104
  53. data/vendor/cereal/LICENSE +0 -24
  54. data/vendor/cereal/README.md +0 -85
  55. data/vendor/cereal/include/cereal/access.hpp +0 -351
  56. data/vendor/cereal/include/cereal/archives/adapters.hpp +0 -163
  57. data/vendor/cereal/include/cereal/archives/binary.hpp +0 -169
  58. data/vendor/cereal/include/cereal/archives/json.hpp +0 -1019
  59. data/vendor/cereal/include/cereal/archives/portable_binary.hpp +0 -334
  60. data/vendor/cereal/include/cereal/archives/xml.hpp +0 -956
  61. data/vendor/cereal/include/cereal/cereal.hpp +0 -1089
  62. data/vendor/cereal/include/cereal/details/helpers.hpp +0 -422
  63. data/vendor/cereal/include/cereal/details/polymorphic_impl.hpp +0 -796
  64. data/vendor/cereal/include/cereal/details/polymorphic_impl_fwd.hpp +0 -65
  65. data/vendor/cereal/include/cereal/details/static_object.hpp +0 -127
  66. data/vendor/cereal/include/cereal/details/traits.hpp +0 -1411
  67. data/vendor/cereal/include/cereal/details/util.hpp +0 -84
  68. data/vendor/cereal/include/cereal/external/base64.hpp +0 -134
  69. data/vendor/cereal/include/cereal/external/rapidjson/allocators.h +0 -284
  70. data/vendor/cereal/include/cereal/external/rapidjson/cursorstreamwrapper.h +0 -78
  71. data/vendor/cereal/include/cereal/external/rapidjson/document.h +0 -2652
  72. data/vendor/cereal/include/cereal/external/rapidjson/encodedstream.h +0 -299
  73. data/vendor/cereal/include/cereal/external/rapidjson/encodings.h +0 -716
  74. data/vendor/cereal/include/cereal/external/rapidjson/error/en.h +0 -74
  75. data/vendor/cereal/include/cereal/external/rapidjson/error/error.h +0 -161
  76. data/vendor/cereal/include/cereal/external/rapidjson/filereadstream.h +0 -99
  77. data/vendor/cereal/include/cereal/external/rapidjson/filewritestream.h +0 -104
  78. data/vendor/cereal/include/cereal/external/rapidjson/fwd.h +0 -151
  79. data/vendor/cereal/include/cereal/external/rapidjson/internal/biginteger.h +0 -290
  80. data/vendor/cereal/include/cereal/external/rapidjson/internal/diyfp.h +0 -271
  81. data/vendor/cereal/include/cereal/external/rapidjson/internal/dtoa.h +0 -245
  82. data/vendor/cereal/include/cereal/external/rapidjson/internal/ieee754.h +0 -78
  83. data/vendor/cereal/include/cereal/external/rapidjson/internal/itoa.h +0 -308
  84. data/vendor/cereal/include/cereal/external/rapidjson/internal/meta.h +0 -186
  85. data/vendor/cereal/include/cereal/external/rapidjson/internal/pow10.h +0 -55
  86. data/vendor/cereal/include/cereal/external/rapidjson/internal/regex.h +0 -740
  87. data/vendor/cereal/include/cereal/external/rapidjson/internal/stack.h +0 -232
  88. data/vendor/cereal/include/cereal/external/rapidjson/internal/strfunc.h +0 -69
  89. data/vendor/cereal/include/cereal/external/rapidjson/internal/strtod.h +0 -290
  90. data/vendor/cereal/include/cereal/external/rapidjson/internal/swap.h +0 -46
  91. data/vendor/cereal/include/cereal/external/rapidjson/istreamwrapper.h +0 -128
  92. data/vendor/cereal/include/cereal/external/rapidjson/memorybuffer.h +0 -70
  93. data/vendor/cereal/include/cereal/external/rapidjson/memorystream.h +0 -71
  94. data/vendor/cereal/include/cereal/external/rapidjson/msinttypes/inttypes.h +0 -316
  95. data/vendor/cereal/include/cereal/external/rapidjson/msinttypes/stdint.h +0 -300
  96. data/vendor/cereal/include/cereal/external/rapidjson/ostreamwrapper.h +0 -81
  97. data/vendor/cereal/include/cereal/external/rapidjson/pointer.h +0 -1414
  98. data/vendor/cereal/include/cereal/external/rapidjson/prettywriter.h +0 -277
  99. data/vendor/cereal/include/cereal/external/rapidjson/rapidjson.h +0 -656
  100. data/vendor/cereal/include/cereal/external/rapidjson/reader.h +0 -2230
  101. data/vendor/cereal/include/cereal/external/rapidjson/schema.h +0 -2497
  102. data/vendor/cereal/include/cereal/external/rapidjson/stream.h +0 -223
  103. data/vendor/cereal/include/cereal/external/rapidjson/stringbuffer.h +0 -121
  104. data/vendor/cereal/include/cereal/external/rapidjson/writer.h +0 -709
  105. data/vendor/cereal/include/cereal/external/rapidxml/license.txt +0 -52
  106. data/vendor/cereal/include/cereal/external/rapidxml/manual.html +0 -406
  107. data/vendor/cereal/include/cereal/external/rapidxml/rapidxml.hpp +0 -2624
  108. data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_iterators.hpp +0 -175
  109. data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_print.hpp +0 -428
  110. data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_utils.hpp +0 -123
  111. data/vendor/cereal/include/cereal/macros.hpp +0 -154
  112. data/vendor/cereal/include/cereal/specialize.hpp +0 -139
  113. data/vendor/cereal/include/cereal/types/array.hpp +0 -79
  114. data/vendor/cereal/include/cereal/types/atomic.hpp +0 -55
  115. data/vendor/cereal/include/cereal/types/base_class.hpp +0 -203
  116. data/vendor/cereal/include/cereal/types/bitset.hpp +0 -176
  117. data/vendor/cereal/include/cereal/types/boost_variant.hpp +0 -164
  118. data/vendor/cereal/include/cereal/types/chrono.hpp +0 -72
  119. data/vendor/cereal/include/cereal/types/common.hpp +0 -129
  120. data/vendor/cereal/include/cereal/types/complex.hpp +0 -56
  121. data/vendor/cereal/include/cereal/types/concepts/pair_associative_container.hpp +0 -73
  122. data/vendor/cereal/include/cereal/types/deque.hpp +0 -62
  123. data/vendor/cereal/include/cereal/types/forward_list.hpp +0 -68
  124. data/vendor/cereal/include/cereal/types/functional.hpp +0 -43
  125. data/vendor/cereal/include/cereal/types/list.hpp +0 -62
  126. data/vendor/cereal/include/cereal/types/map.hpp +0 -36
  127. data/vendor/cereal/include/cereal/types/memory.hpp +0 -425
  128. data/vendor/cereal/include/cereal/types/optional.hpp +0 -66
  129. data/vendor/cereal/include/cereal/types/polymorphic.hpp +0 -483
  130. data/vendor/cereal/include/cereal/types/queue.hpp +0 -132
  131. data/vendor/cereal/include/cereal/types/set.hpp +0 -103
  132. data/vendor/cereal/include/cereal/types/stack.hpp +0 -76
  133. data/vendor/cereal/include/cereal/types/string.hpp +0 -61
  134. data/vendor/cereal/include/cereal/types/tuple.hpp +0 -123
  135. data/vendor/cereal/include/cereal/types/unordered_map.hpp +0 -36
  136. data/vendor/cereal/include/cereal/types/unordered_set.hpp +0 -99
  137. data/vendor/cereal/include/cereal/types/utility.hpp +0 -47
  138. data/vendor/cereal/include/cereal/types/valarray.hpp +0 -89
  139. data/vendor/cereal/include/cereal/types/variant.hpp +0 -109
  140. data/vendor/cereal/include/cereal/types/vector.hpp +0 -112
  141. data/vendor/cereal/include/cereal/version.hpp +0 -52
  142. data/vendor/isotree/src/Makevars +0 -4
  143. data/vendor/isotree/src/crit.cpp +0 -912
  144. data/vendor/isotree/src/dist.cpp +0 -749
  145. data/vendor/isotree/src/extended.cpp +0 -790
  146. data/vendor/isotree/src/fit_model.cpp +0 -1090
  147. data/vendor/isotree/src/helpers_iforest.cpp +0 -324
  148. data/vendor/isotree/src/isoforest.cpp +0 -771
  149. data/vendor/isotree/src/mult.cpp +0 -607
  150. data/vendor/isotree/src/predict.cpp +0 -853
  151. data/vendor/isotree/src/utils.cpp +0 -1566
@@ -0,0 +1,842 @@
1
+ /* Isolation forests and variations thereof, with adjustments for incorporation
2
+ * of categorical variables and missing values.
3
+ * Writen for C++11 standard and aimed at being used in R and Python.
4
+ *
5
+ * This library is based on the following works:
6
+ * [1] Liu, Fei Tony, Kai Ming Ting, and Zhi-Hua Zhou.
7
+ * "Isolation forest."
8
+ * 2008 Eighth IEEE International Conference on Data Mining. IEEE, 2008.
9
+ * [2] Liu, Fei Tony, Kai Ming Ting, and Zhi-Hua Zhou.
10
+ * "Isolation-based anomaly detection."
11
+ * ACM Transactions on Knowledge Discovery from Data (TKDD) 6.1 (2012): 3.
12
+ * [3] Hariri, Sahand, Matias Carrasco Kind, and Robert J. Brunner.
13
+ * "Extended Isolation Forest."
14
+ * arXiv preprint arXiv:1811.02141 (2018).
15
+ * [4] Liu, Fei Tony, Kai Ming Ting, and Zhi-Hua Zhou.
16
+ * "On detecting clustered anomalies using SCiForest."
17
+ * Joint European Conference on Machine Learning and Knowledge Discovery in Databases. Springer, Berlin, Heidelberg, 2010.
18
+ * [5] https://sourceforge.net/projects/iforest/
19
+ * [6] https://math.stackexchange.com/questions/3388518/expected-number-of-paths-required-to-separate-elements-in-a-binary-tree
20
+ * [7] Quinlan, J. Ross. C4. 5: programs for machine learning. Elsevier, 2014.
21
+ * [8] Cortes, David.
22
+ * "Distance approximation using Isolation Forests."
23
+ * arXiv preprint arXiv:1910.12362 (2019).
24
+ * [9] Cortes, David.
25
+ * "Imputing missing values with unsupervised random trees."
26
+ * arXiv preprint arXiv:1911.06646 (2019).
27
+ * [10] https://math.stackexchange.com/questions/3333220/expected-average-depth-in-random-binary-tree-constructed-top-to-bottom
28
+ * [11] Cortes, David.
29
+ * "Revisiting randomized choices in isolation forests."
30
+ * arXiv preprint arXiv:2110.13402 (2021).
31
+ * [12] Guha, Sudipto, et al.
32
+ * "Robust random cut forest based anomaly detection on streams."
33
+ * International conference on machine learning. PMLR, 2016.
34
+ * [13] Cortes, David.
35
+ * "Isolation forests: looking beyond tree depth."
36
+ * arXiv preprint arXiv:2111.11639 (2021).
37
+ * [14] Ting, Kai Ming, Yue Zhu, and Zhi-Hua Zhou.
38
+ * "Isolation kernel and its effect on SVM"
39
+ * Proceedings of the 24th ACM SIGKDD
40
+ * International Conference on Knowledge Discovery & Data Mining. 2018.
41
+ *
42
+ * BSD 2-Clause License
43
+ * Copyright (c) 2019-2022, David Cortes
44
+ * All rights reserved.
45
+ * Redistribution and use in source and binary forms, with or without
46
+ * modification, are permitted provided that the following conditions are met:
47
+ * * Redistributions of source code must retain the above copyright notice, this
48
+ * list of conditions and the following disclaimer.
49
+ * * Redistributions in binary form must reproduce the above copyright notice,
50
+ * this list of conditions and the following disclaimer in the documentation
51
+ * and/or other materials provided with the distribution.
52
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
53
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
54
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
55
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
56
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
57
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
58
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
59
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
60
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
61
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
62
+ */
63
+ #if !defined(_FOR_R) && !defined(_FOR_PYTHON)
64
+ #include "isotree.hpp"
65
+ #include "isotree_exportable.hpp"
66
+ #include "oop_interface.hpp"
67
+ using namespace isotree;
68
+
69
+ IsolationForest::IsolationForest
70
+ (
71
+ size_t ndim, size_t ntry, CoefType coef_type, bool coef_by_prop,
72
+ bool with_replacement, bool weight_as_sample,
73
+ size_t sample_size, size_t ntrees,
74
+ size_t max_depth, size_t ncols_per_tree, bool limit_depth,
75
+ bool penalize_range, bool standardize_data,
76
+ ScoringMetric scoring_metric, bool fast_bratio, bool weigh_by_kurt,
77
+ double prob_pick_by_gain_pl, double prob_pick_by_gain_avg,
78
+ double prob_pick_by_full_gain, double prob_pick_by_dens,
79
+ double prob_pick_col_by_range, double prob_pick_col_by_var,
80
+ double prob_pick_col_by_kurt,
81
+ double min_gain, MissingAction missing_action,
82
+ CategSplit cat_split_type, NewCategAction new_cat_action,
83
+ bool all_perm, bool build_imputer, size_t min_imp_obs,
84
+ UseDepthImp depth_imp, WeighImpRows weigh_imp_rows,
85
+ uint64_t random_seed, int nthreads
86
+ )
87
+ :
88
+ ndim(ndim),
89
+ ntry(ntry),
90
+ coef_type(coef_type),
91
+ coef_by_prop(coef_by_prop),
92
+ with_replacement(with_replacement),
93
+ weight_as_sample(weight_as_sample),
94
+ sample_size(sample_size),
95
+ ntrees(ntrees),
96
+ max_depth(max_depth),
97
+ ncols_per_tree(ncols_per_tree),
98
+ limit_depth(limit_depth),
99
+ penalize_range(penalize_range),
100
+ standardize_data(standardize_data),
101
+ scoring_metric(scoring_metric),
102
+ fast_bratio(fast_bratio),
103
+ weigh_by_kurt(weigh_by_kurt),
104
+ prob_pick_by_gain_pl(prob_pick_by_gain_pl),
105
+ prob_pick_by_gain_avg(prob_pick_by_gain_avg),
106
+ prob_pick_by_full_gain(prob_pick_by_full_gain),
107
+ prob_pick_by_dens(prob_pick_by_dens),
108
+ prob_pick_col_by_range(prob_pick_col_by_range),
109
+ prob_pick_col_by_var(prob_pick_col_by_var),
110
+ prob_pick_col_by_kurt(prob_pick_col_by_kurt),
111
+ min_gain(min_gain),
112
+ missing_action(missing_action),
113
+ cat_split_type(cat_split_type),
114
+ new_cat_action(new_cat_action),
115
+ all_perm(all_perm),
116
+ build_imputer(build_imputer),
117
+ min_imp_obs(min_imp_obs),
118
+ depth_imp(depth_imp),
119
+ weigh_imp_rows(weigh_imp_rows),
120
+ random_seed(random_seed)
121
+ {}
122
+
123
+
124
+ void IsolationForest::fit(double X[], size_t nrows, size_t ncols)
125
+ {
126
+ this->check_params();
127
+ this->override_previous_fit();
128
+
129
+ auto retcode = fit_iforest(
130
+ (this->ndim == 1)? &this->model : nullptr,
131
+ (this->ndim != 1)? &this->model_ext : nullptr,
132
+ X, ncols,
133
+ (int*)nullptr, (size_t)0, (int*)nullptr,
134
+ (double*)nullptr, (int*)nullptr, (int*)nullptr,
135
+ this->ndim, this->ntry, this->coef_type, this->coef_by_prop,
136
+ (double*)nullptr, this->with_replacement, this->weight_as_sample,
137
+ nrows, this->sample_size, this->ntrees,
138
+ this->max_depth, this->ncols_per_tree,
139
+ this->limit_depth, this->penalize_range, this->standardize_data,
140
+ this->scoring_metric, this->fast_bratio,
141
+ false, (double*)nullptr,
142
+ (double*)nullptr, true,
143
+ (double*)nullptr, this->weigh_by_kurt,
144
+ this->prob_pick_by_gain_pl,
145
+ this->prob_pick_by_gain_avg,
146
+ this->prob_pick_by_full_gain,
147
+ this->prob_pick_by_dens,
148
+ this->prob_pick_col_by_range,
149
+ this->prob_pick_col_by_var,
150
+ this->prob_pick_col_by_kurt,
151
+ this->min_gain, this->missing_action,
152
+ this->cat_split_type, this->new_cat_action,
153
+ this->all_perm, &this->imputer, this->min_imp_obs,
154
+ this->depth_imp, this->weigh_imp_rows, false,
155
+ this->random_seed, false, this->nthreads
156
+ );
157
+ if (retcode != EXIT_SUCCESS) unexpected_error();
158
+ this->is_fitted = true;
159
+ }
160
+
161
+ void IsolationForest::fit(double numeric_data[], size_t ncols_numeric, size_t nrows,
162
+ int categ_data[], size_t ncols_categ, int ncat[],
163
+ double sample_weights[], double col_weights[])
164
+ {
165
+ this->check_params();
166
+ this->override_previous_fit();
167
+
168
+ auto retcode = fit_iforest(
169
+ (this->ndim == 1)? &this->model : nullptr,
170
+ (this->ndim != 1)? &this->model_ext : nullptr,
171
+ numeric_data, ncols_numeric,
172
+ categ_data, ncols_categ, ncat,
173
+ (double*)nullptr, (int*)nullptr, (int*)nullptr,
174
+ this->ndim, this->ntry, this->coef_type, this->coef_by_prop,
175
+ sample_weights, this->with_replacement, this->weight_as_sample,
176
+ nrows, this->sample_size, this->ntrees,
177
+ this->max_depth, this->ncols_per_tree,
178
+ this->limit_depth, this->penalize_range, this->standardize_data,
179
+ this->scoring_metric, this->fast_bratio,
180
+ false, (double*)nullptr,
181
+ (double*)nullptr, true,
182
+ col_weights, this->weigh_by_kurt,
183
+ this->prob_pick_by_gain_pl,
184
+ this->prob_pick_by_gain_avg,
185
+ this->prob_pick_by_full_gain,
186
+ this->prob_pick_by_dens,
187
+ this->prob_pick_col_by_range,
188
+ this->prob_pick_col_by_var,
189
+ this->prob_pick_col_by_kurt,
190
+ this->min_gain, this->missing_action,
191
+ this->cat_split_type, this->new_cat_action,
192
+ this->all_perm, &this->imputer, this->min_imp_obs,
193
+ this->depth_imp, this->weigh_imp_rows, false,
194
+ this->random_seed, false, this->nthreads
195
+ );
196
+ if (retcode != EXIT_SUCCESS) unexpected_error();
197
+ this->is_fitted = true;
198
+ }
199
+
200
+ void IsolationForest::fit(double Xc[], int Xc_ind[], int Xc_indptr[],
201
+ size_t ncols_numeric, size_t nrows,
202
+ int categ_data[], size_t ncols_categ, int ncat[],
203
+ double sample_weights[], double col_weights[])
204
+ {
205
+ this->check_params();
206
+ this->override_previous_fit();
207
+
208
+ auto retcode = fit_iforest(
209
+ (this->ndim == 1)? &this->model : nullptr,
210
+ (this->ndim != 1)? &this->model_ext : nullptr,
211
+ (double*)nullptr, ncols_numeric,
212
+ categ_data, ncols_categ, ncat,
213
+ Xc, Xc_ind, Xc_indptr,
214
+ this->ndim, this->ntry, this->coef_type, this->coef_by_prop,
215
+ sample_weights, this->with_replacement, this->weight_as_sample,
216
+ nrows, this->sample_size, this->ntrees,
217
+ this->max_depth, this->ncols_per_tree,
218
+ this->limit_depth, this->penalize_range, this->standardize_data,
219
+ this->scoring_metric, this->fast_bratio,
220
+ false, (double*)nullptr,
221
+ (double*)nullptr, true,
222
+ col_weights, this->weigh_by_kurt,
223
+ this->prob_pick_by_gain_pl,
224
+ this->prob_pick_by_gain_avg,
225
+ this->prob_pick_by_full_gain,
226
+ this->prob_pick_by_dens,
227
+ this->prob_pick_col_by_range,
228
+ this->prob_pick_col_by_var,
229
+ this->prob_pick_col_by_kurt,
230
+ this->min_gain, this->missing_action,
231
+ this->cat_split_type, this->new_cat_action,
232
+ this->all_perm, &this->imputer, this->min_imp_obs,
233
+ this->depth_imp, this->weigh_imp_rows, false,
234
+ this->random_seed, false, this->nthreads
235
+ );
236
+ if (retcode != EXIT_SUCCESS) unexpected_error();
237
+ this->is_fitted = true;
238
+ }
239
+
240
+ std::vector<double> IsolationForest::predict(double X[], size_t nrows, bool standardize)
241
+ {
242
+ this->check_is_fitted();
243
+ this->check_nthreads();
244
+ std::vector<double> out(nrows);
245
+ predict_iforest(
246
+ X, (int*)nullptr,
247
+ true, (size_t)0, (size_t)0,
248
+ (double*)nullptr, (int*)nullptr, (int*)nullptr,
249
+ (double*)nullptr, (int*)nullptr, (int*)nullptr,
250
+ nrows, this->nthreads, standardize,
251
+ (!this->model.trees.empty())? &this->model : nullptr,
252
+ (!this->model_ext.hplanes.empty())? &this->model_ext : nullptr,
253
+ out.data(), (int*)nullptr, (double*)nullptr,
254
+ (TreesIndexer*)nullptr);
255
+ return out;
256
+ }
257
+
258
+ void IsolationForest::predict(double numeric_data[], int categ_data[], bool is_col_major,
259
+ size_t nrows, size_t ld_numeric, size_t ld_categ, bool standardize,
260
+ double output_depths[], int tree_num[], double per_tree_depths[])
261
+ {
262
+ this->check_is_fitted();
263
+ this->check_nthreads();
264
+ if ((tree_num || per_tree_depths) && !this->check_can_predict_per_tree())
265
+ throw std::runtime_error("Cannot predict tree numbers/depths with this model.\n");
266
+ predict_iforest(
267
+ numeric_data, categ_data,
268
+ is_col_major, ld_numeric, ld_categ,
269
+ (double*)nullptr, (int*)nullptr, (int*)nullptr,
270
+ (double*)nullptr, (int*)nullptr, (int*)nullptr,
271
+ nrows, this->nthreads, standardize,
272
+ (!this->model.trees.empty())? &this->model : nullptr,
273
+ (!this->model_ext.hplanes.empty())? &this->model_ext : nullptr,
274
+ output_depths, tree_num, per_tree_depths,
275
+ (!this->indexer.indices.empty())? &this->indexer : nullptr);
276
+ }
277
+
278
+ void IsolationForest::predict(double X_sparse[], int X_ind[], int X_indptr[], bool is_csc,
279
+ int categ_data[], bool is_col_major, size_t ld_categ, size_t nrows, bool standardize,
280
+ double output_depths[], int tree_num[], double per_tree_depths[])
281
+ {
282
+ this->check_is_fitted();
283
+ this->check_nthreads();
284
+ if ((tree_num || per_tree_depths) && !this->check_can_predict_per_tree())
285
+ throw std::runtime_error("Cannot predict tree numbers/depths with this model.\n");
286
+ std::vector<double> out(nrows);
287
+ predict_iforest(
288
+ (double*)nullptr, categ_data,
289
+ is_col_major, (size_t)0, ld_categ,
290
+ is_csc? X_sparse : (double*)nullptr, is_csc? X_ind : (int*)nullptr, is_csc? X_indptr : (int*)nullptr,
291
+ is_csc? (double*)nullptr : X_sparse, is_csc? (int*)nullptr : X_ind, is_csc? (int*)nullptr : X_indptr,
292
+ nrows, this->nthreads, standardize,
293
+ (!this->model.trees.empty())? &this->model : nullptr,
294
+ (!this->model_ext.hplanes.empty())? &this->model_ext : nullptr,
295
+ output_depths, tree_num, per_tree_depths,
296
+ (!this->indexer.indices.empty())? &this->indexer : nullptr);
297
+ }
298
+
299
+ std::vector<double> IsolationForest::predict_distance(double X[], size_t nrows,
300
+ bool as_kernel,
301
+ bool assume_full_distr, bool standardize,
302
+ bool triangular)
303
+ {
304
+ this->check_is_fitted();
305
+ this->check_nthreads();
306
+ std::vector<double> tmat(calc_ncomb(nrows));
307
+ std::vector<double> dmat(triangular? square(nrows) : 0);
308
+
309
+ calc_similarity(X, (int*)nullptr,
310
+ (double*)nullptr, (int*)nullptr, (int*)nullptr,
311
+ nrows, false, this->nthreads, assume_full_distr, standardize, as_kernel,
312
+ (!this->model.trees.empty())? &this->model : nullptr,
313
+ (!this->model_ext.hplanes.empty())? &this->model_ext : nullptr,
314
+ tmat.data(), (double*)nullptr, (size_t)0, false,
315
+ (!this->indexer.indices.empty())? &this->indexer : nullptr,
316
+ true, (size_t)0, (size_t)0);
317
+ if (!triangular) {
318
+ double diag_filler;
319
+ if (as_kernel) {
320
+ if (standardize)
321
+ diag_filler = 1.;
322
+ else
323
+ diag_filler = std::max(this->model.trees.size(), this->model_ext.hplanes.size());
324
+ }
325
+ else {
326
+ if (standardize)
327
+ diag_filler = 0;
328
+ else
329
+ diag_filler = std::numeric_limits<double>::infinity();
330
+ }
331
+ tmat_to_dense(tmat.data(), dmat.data(), nrows, diag_filler);
332
+ }
333
+ return (triangular? tmat : dmat);
334
+ }
335
+
336
+ void IsolationForest::predict_distance(double numeric_data[], int categ_data[],
337
+ size_t nrows,
338
+ bool as_kernel,
339
+ bool assume_full_distr, bool standardize,
340
+ bool triangular,
341
+ double dist_matrix[])
342
+ {
343
+ this->check_is_fitted();
344
+ this->check_nthreads();
345
+ std::vector<double> tmat(triangular? 0 : calc_ncomb(nrows));
346
+
347
+ calc_similarity(numeric_data, categ_data,
348
+ (double*)nullptr, (int*)nullptr, (int*)nullptr,
349
+ nrows, false, this->nthreads, assume_full_distr, standardize, as_kernel,
350
+ (!this->model.trees.empty())? &this->model : nullptr,
351
+ (!this->model_ext.hplanes.empty())? &this->model_ext : nullptr,
352
+ triangular? dist_matrix : tmat.data(),
353
+ (double*)nullptr, (size_t)0, false,
354
+ (!this->indexer.indices.empty())? &this->indexer : nullptr,
355
+ true, (size_t)0, (size_t)0);
356
+ if (!triangular) {
357
+ double diag_filler;
358
+ if (as_kernel) {
359
+ if (standardize)
360
+ diag_filler = 1.;
361
+ else
362
+ diag_filler = std::max(this->model.trees.size(), this->model_ext.hplanes.size());
363
+ }
364
+ else {
365
+ if (standardize)
366
+ diag_filler = 0;
367
+ else
368
+ diag_filler = std::numeric_limits<double>::infinity();
369
+ }
370
+ tmat_to_dense(tmat.data(), dist_matrix, nrows, diag_filler);
371
+ }
372
+ }
373
+
374
+ void IsolationForest::predict_distance(double Xc[], int Xc_ind[], int Xc_indptr[], int categ_data[],
375
+ size_t nrows,
376
+ bool as_kernel,
377
+ bool assume_full_distr, bool standardize,
378
+ bool triangular,
379
+ double dist_matrix[])
380
+ {
381
+ this->check_is_fitted();
382
+ this->check_nthreads();
383
+ std::vector<double> tmat(triangular? 0 : calc_ncomb(nrows));
384
+
385
+ calc_similarity((double*)nullptr, (int*)nullptr,
386
+ Xc, Xc_ind, Xc_indptr,
387
+ nrows, false, this->nthreads, assume_full_distr, standardize, as_kernel,
388
+ (!this->model.trees.empty())? &this->model : nullptr,
389
+ (!this->model_ext.hplanes.empty())? &this->model_ext : nullptr,
390
+ triangular? dist_matrix : tmat.data(),
391
+ (double*)nullptr, (size_t)0, false,
392
+ (!this->indexer.indices.empty())? &this->indexer : nullptr,
393
+ true, (size_t)0, (size_t)0);
394
+ if (!triangular) {
395
+ double diag_filler;
396
+ if (as_kernel) {
397
+ if (standardize)
398
+ diag_filler = 1.;
399
+ else
400
+ diag_filler = std::max(this->model.trees.size(), this->model_ext.hplanes.size());
401
+ }
402
+ else {
403
+ if (standardize)
404
+ diag_filler = 0;
405
+ else
406
+ diag_filler = std::numeric_limits<double>::infinity();
407
+ }
408
+ tmat_to_dense(tmat.data(), dist_matrix, nrows, diag_filler);
409
+ }
410
+ }
411
+
412
+ void IsolationForest::impute(double X[], size_t nrows)
413
+ {
414
+ this->check_is_fitted();
415
+ this->check_nthreads();
416
+ if (this->imputer.imputer_tree.empty())
417
+ throw std::runtime_error("Model was built without imputation capabilities.\n");
418
+ impute_missing_values(X, (int*)nullptr, true,
419
+ (double*)nullptr, (int*)nullptr, (int*)nullptr,
420
+ nrows, false, this->nthreads,
421
+ (!this->model.trees.empty())? &this->model : nullptr,
422
+ (!this->model_ext.hplanes.empty())? &this->model_ext : nullptr,
423
+ this->imputer);
424
+ }
425
+
426
+ void IsolationForest::impute(double numeric_data[], int categ_data[], bool is_col_major, size_t nrows)
427
+ {
428
+ this->check_is_fitted();
429
+ if (this->imputer.imputer_tree.empty())
430
+ throw std::runtime_error("Model was built without imputation capabilities.\n");
431
+ this->check_nthreads();
432
+ impute_missing_values(numeric_data, categ_data, is_col_major,
433
+ (double*)nullptr, (int*)nullptr, (int*)nullptr,
434
+ nrows, false, this->nthreads,
435
+ (!this->model.trees.empty())? &this->model : nullptr,
436
+ (!this->model_ext.hplanes.empty())? &this->model_ext : nullptr,
437
+ this->imputer);
438
+ }
439
+
440
+ void IsolationForest::impute(double Xr[], int Xr_ind[], int Xr_indptr[],
441
+ int categ_data[], bool is_col_major, size_t nrows)
442
+ {
443
+ this->check_is_fitted();
444
+ if (this->imputer.imputer_tree.empty())
445
+ throw std::runtime_error("Model was built without imputation capabilities.\n");
446
+ this->check_nthreads();
447
+ impute_missing_values((double*)nullptr, categ_data, is_col_major,
448
+ Xr, Xr_ind, Xr_indptr,
449
+ nrows, false, this->nthreads,
450
+ (!this->model.trees.empty())? &this->model : nullptr,
451
+ (!this->model_ext.hplanes.empty())? &this->model_ext : nullptr,
452
+ this->imputer);
453
+ }
454
+
455
+ void IsolationForest::build_indexer(const bool with_distances)
456
+ {
457
+ this->check_is_fitted();
458
+ if (!this->indexer.indices.empty())
459
+ return;
460
+ if (this->missing_action == Divide)
461
+ throw std::runtime_error("Cannot build tree indexer when using 'missing_action=Divide'.\n");
462
+ if (!this->model.trees.empty() && this->new_cat_action == Weighted && this->cat_split_type == SubSet)
463
+ throw std::runtime_error("Cannot build tree indexer when using 'new_cat_action=Weighted' with single-variable model.\n");
464
+
465
+ if (!this->model.trees.empty())
466
+ build_tree_indices(this->indexer, this->model, this->nthreads, with_distances);
467
+ else if (!this->model_ext.hplanes.empty())
468
+ build_tree_indices(this->indexer, this->model_ext, this->nthreads, with_distances);
469
+ else
470
+ unexpected_error();
471
+ }
472
+
473
+ void IsolationForest::set_as_reference_points(double numeric_data[], int categ_data[], bool is_col_major,
474
+ size_t nrows, size_t ld_numeric, size_t ld_categ,
475
+ const bool with_distances)
476
+ {
477
+ this->check_is_fitted();
478
+ if (!this->model.trees.empty())
479
+ set_reference_points(&this->model, (ExtIsoForest*)NULL, &this->indexer,
480
+ with_distances,
481
+ numeric_data, categ_data,
482
+ is_col_major, ld_numeric, ld_categ,
483
+ (double*)NULL, (int*)NULL, (int*)NULL,
484
+ (double*)NULL, (int*)NULL, (int*)NULL,
485
+ nrows, this->nthreads);
486
+ else
487
+ set_reference_points((IsoForest*)NULL, &this->model_ext, &this->indexer,
488
+ with_distances,
489
+ numeric_data, categ_data,
490
+ is_col_major, ld_numeric, ld_categ,
491
+ (double*)NULL, (int*)NULL, (int*)NULL,
492
+ (double*)NULL, (int*)NULL, (int*)NULL,
493
+ nrows, this->nthreads);
494
+ }
495
+
496
+ void IsolationForest::set_as_reference_points(double Xc[], int Xc_ind[], int Xc_indptr[], int categ_data[],
497
+ size_t nrows, const bool with_distances)
498
+ {
499
+ this->check_is_fitted();
500
+ if (!this->model.trees.empty())
501
+ set_reference_points(&this->model, (ExtIsoForest*)NULL, &this->indexer,
502
+ with_distances,
503
+ (double*)NULL, (int*)NULL,
504
+ true, (size_t)0, (size_t)0,
505
+ Xc, Xc_ind, Xc_indptr,
506
+ (double*)NULL, (int*)NULL, (int*)NULL,
507
+ nrows, this->nthreads);
508
+ else
509
+ set_reference_points((IsoForest*)NULL, &this->model_ext, &this->indexer,
510
+ with_distances,
511
+ (double*)NULL, (int*)NULL,
512
+ true, (size_t)0, (size_t)0,
513
+ Xc, Xc_ind, Xc_indptr,
514
+ (double*)NULL, (int*)NULL, (int*)NULL,
515
+ nrows, this->nthreads);
516
+ }
517
+
518
+ size_t IsolationForest::get_num_reference_points() const noexcept
519
+ {
520
+ return get_number_of_reference_points(this->indexer);
521
+ }
522
+
523
+ void IsolationForest::predict_distance_to_ref_points(double numeric_data[], int categ_data[],
524
+ double Xc[], int Xc_ind[], int Xc_indptr[],
525
+ size_t nrows, bool is_col_major, size_t ld_numeric, size_t ld_categ,
526
+ bool as_kernel, bool standardize,
527
+ double dist_matrix[])
528
+ {
529
+ this->check_is_fitted();
530
+ if (this->indexer.indices.empty())
531
+ throw std::runtime_error("Model has no indexer. Cannot predict distances to indexer.\n");
532
+ if (!as_kernel && this->indexer.indices.front().node_distances.empty())
533
+ throw std::runtime_error("Model's indexer was built without distances. Cannot calculate distances to reference points.\n");
534
+ if (this->indexer.indices.front().reference_points.empty())
535
+ throw std::runtime_error("Model's indexer has no reference points. Cannot calculate distances to reference points.\n");
536
+ if (dist_matrix == NULL)
537
+ throw std::runtime_error("Passed a NULL pointer for 'dist_matrix'.\n");
538
+
539
+ calc_similarity(numeric_data, categ_data,
540
+ Xc, Xc_ind, Xc_indptr,
541
+ nrows, false, this->nthreads, true, standardize, as_kernel,
542
+ (!this->model.trees.empty())? &this->model : NULL,
543
+ (!this->model_ext.hplanes.empty())? &this->model_ext : NULL,
544
+ (double*)NULL, dist_matrix, (size_t)0, true,
545
+ &this->indexer, is_col_major, ld_numeric, ld_categ);
546
+ }
547
+
548
+ void IsolationForest::serialize(FILE *out) const
549
+ {
550
+ this->serialize_template(out);
551
+ }
552
+
553
+ void IsolationForest::serialize(std::ostream &out) const
554
+ {
555
+ this->serialize_template(out);
556
+ }
557
+
558
+ IsolationForest IsolationForest::deserialize(FILE *inp, int nthreads)
559
+ {
560
+ return deserialize_template(inp, nthreads);
561
+ }
562
+
563
+ IsolationForest IsolationForest::deserialize(std::istream &inp, int nthreads)
564
+ {
565
+ return deserialize_template(inp, nthreads);
566
+ }
567
+
568
+ std::ostream& operator<<(std::ostream &ost, const IsolationForest &model)
569
+ {
570
+ model.serialize(ost);
571
+ return ost;
572
+ }
573
+
574
+
575
+ std::ostream& isotree::operator<<(std::ostream &ost, const IsolationForest &model)
576
+ {
577
+ model.serialize(ost);
578
+ return ost;
579
+ }
580
+
581
+ std::istream& operator>>(std::istream &ist, IsolationForest &model)
582
+ {
583
+ model = IsolationForest::deserialize(ist, -1);
584
+ return ist;
585
+ }
586
+
587
+ std::istream& isotree::operator>>(std::istream &ist, IsolationForest &model)
588
+ {
589
+ model = IsolationForest::deserialize(ist, -1);
590
+ return ist;
591
+ }
592
+
593
+ IsoForest& IsolationForest::get_model()
594
+ {
595
+ if (this->ndim != 1)
596
+ throw std::runtime_error("Error: class contains an 'ExtIsoForest' model only.\n");
597
+ return this->model;
598
+ }
599
+
600
+ ExtIsoForest& IsolationForest::get_model_ext()
601
+ {
602
+ if (this->ndim == 1)
603
+ throw std::runtime_error("Error: class contains an 'IsoForest' model only.\n");
604
+ return this->model_ext;
605
+ }
606
+
607
+ Imputer& IsolationForest::get_imputer()
608
+ {
609
+ if (!this->build_imputer)
610
+ throw std::runtime_error("Error: model does not contain imputer.\n");
611
+ return this->imputer;
612
+ }
613
+
614
+ TreesIndexer& IsolationForest::get_indexer()
615
+ {
616
+ if (this->indexer.indices.empty() && (!this->model.trees.empty() || !this->model_ext.hplanes.empty()))
617
+ throw std::runtime_error("Error: model does not contain indexer.\n");
618
+ return this->indexer;
619
+ }
620
+
621
+ void IsolationForest::check_nthreads()
622
+ {
623
+ if (this->nthreads < 0) {
624
+ #ifdef _OPENMP
625
+ this->nthreads = omp_get_max_threads() + this->nthreads + 1;
626
+ #else
627
+ this->nthreads = 1;
628
+ #endif
629
+ }
630
+ if (nthreads <= 0) {
631
+ fprintf(stderr, "'isotree' got invalid 'nthreads', will set to 1.\n");
632
+ this->nthreads = 1;
633
+ }
634
+ #ifndef _OPENMP
635
+ else if (nthreads > 1) {
636
+ fprintf(stderr,
637
+ "Passed nthreads:%d to 'isotree', but library was compiled without multithreading.\n",
638
+ this->nthreads);
639
+ this->nthreads = 1;
640
+ }
641
+ #endif
642
+ }
643
+
644
+ size_t IsolationForest::get_ntrees() const
645
+ {
646
+ if (!this->model.trees.empty())
647
+ return this->model.trees.size();
648
+ else if (!this->model_ext.hplanes.empty())
649
+ return this->model_ext.hplanes.size();
650
+ else
651
+ throw std::runtime_error("Model is not fitted or is corrupted.\n");
652
+ }
653
+
654
+ bool IsolationForest::check_can_predict_per_tree() const
655
+ {
656
+ if (!this->model.trees.empty())
657
+ {
658
+ if (this->model.missing_action == Divide)
659
+ return false;
660
+ if (this->model.new_cat_action == Weighted && this->cat_split_type != SingleCateg)
661
+ {
662
+ for (const std::vector<IsoTree> &tree : this->model.trees)
663
+ for (const IsoTree &node : tree)
664
+ if (node.col_type == Categorical)
665
+ return false;
666
+ }
667
+ }
668
+
669
+ return true;
670
+ }
671
+
672
+ void IsolationForest::override_previous_fit()
673
+ {
674
+ if (this->is_fitted) {
675
+ this->model = IsoForest();
676
+ this->model_ext = ExtIsoForest();
677
+ this->imputer = Imputer();
678
+ this->indexer = TreesIndexer();
679
+ }
680
+ }
681
+
682
+ void IsolationForest::check_params()
683
+ {
684
+ this->check_nthreads();
685
+
686
+ if (this->prob_pick_by_gain_avg < 0) throw std::runtime_error("'prob_pick_by_gain_avg' must be >= 0.\n");
687
+ if (this->prob_pick_by_gain_pl < 0) throw std::runtime_error("'prob_pick_by_gain_pl' must be >= 0.\n");
688
+ if (this->prob_pick_by_full_gain < 0) throw std::runtime_error("'prob_pick_by_full_gain' must be >= 0.\n");
689
+ if (this->prob_pick_by_dens < 0) throw std::runtime_error("'prob_pick_by_dens' must be >= 0.\n");
690
+ if (this->prob_pick_col_by_range < 0) throw std::runtime_error("'prob_pick_col_by_range' must be >= 0.\n");
691
+ if (this->prob_pick_col_by_var < 0) throw std::runtime_error("'prob_pick_col_by_var' must be >= 0.\n");
692
+ if (this->prob_pick_col_by_kurt < 0) throw std::runtime_error("'prob_pick_col_by_kurt' must be >= 0.\n");
693
+
694
+ if (prob_pick_by_gain_avg + prob_pick_by_gain_pl + prob_pick_by_full_gain + prob_pick_by_dens
695
+ > 1. + 2. * std::numeric_limits<double>::epsilon())
696
+ throw std::runtime_error("Probabilities for gain-based splits sum to more than 1.\n");
697
+
698
+ if (prob_pick_col_by_var + prob_pick_col_by_var + prob_pick_col_by_kurt
699
+ > 1. + 2. * std::numeric_limits<double>::epsilon())
700
+ throw std::runtime_error("Probabilities for column choices sum to more than 1.\n");
701
+
702
+ if (min_gain < 0)
703
+ throw std::runtime_error("'min_gain' cannot be negative.\n");
704
+
705
+ if (this->ndim != 1) {
706
+ if (this->missing_action == Divide)
707
+ throw std::runtime_error("'missing_action' = 'Divide' not supported in extended model.\n");
708
+ }
709
+
710
+ if (this->coef_type != Uniform && this->coef_type != Normal)
711
+ throw std::runtime_error("Invalid 'coef_type'.\n");
712
+ if (this->missing_action != Divide && this->missing_action != Impute && this->missing_action != Fail)
713
+ throw std::runtime_error("Invalid 'missing_action'.\n");
714
+ if (this->cat_split_type != SubSet && this->cat_split_type != SingleCateg)
715
+ throw std::runtime_error("Invalid 'cat_split_type'.\n");
716
+ if (this->new_cat_action != Weighted && this->new_cat_action != Smallest && this->new_cat_action != Random)
717
+ throw std::runtime_error("Invalid 'new_cat_action'.\n");
718
+ if (this->depth_imp != Lower && this->depth_imp != Higher && this->depth_imp != Same)
719
+ throw std::runtime_error("Invalid 'depth_imp'.\n");
720
+ if (this->weigh_imp_rows != Inverse && this->weigh_imp_rows != Prop && this->weigh_imp_rows != Flat)
721
+ throw std::runtime_error("Invalid 'weigh_imp_rows'.\n");
722
+
723
+ if (this->sample_size > 0 && this->sample_size <= 2)
724
+ throw std::runtime_error("'sample_size' must be greater than 2.\n");
725
+
726
+ if (this->penalize_range && (this->scoring_metric == Density || this->scoring_metric == AdjDensity))
727
+ throw std::runtime_error("'penalize_range' is incompatible with density scoring.\n");
728
+ }
729
+
730
+ void IsolationForest::check_is_fitted() const
731
+ {
732
+ if (!this->is_fitted)
733
+ throw std::runtime_error("Model has not been fitted.\n");
734
+ }
735
+
736
+ template <class otype>
737
+ void IsolationForest::serialize_template(otype &out) const
738
+ {
739
+ this->check_is_fitted();
740
+
741
+ serialize_combined(
742
+ (!this->model.trees.empty())? &this->model : nullptr,
743
+ (!this->model_ext.hplanes.empty())? &this->model_ext : nullptr,
744
+ (!this->imputer.imputer_tree.empty())? &this->imputer : nullptr,
745
+ (!this->indexer.indices.empty())? &this->indexer : nullptr,
746
+ (char*)nullptr,
747
+ (size_t)0,
748
+ out
749
+ );
750
+ }
751
+
752
+ IsolationForest::IsolationForest(int nthreads, size_t ndim, size_t ntrees, bool build_imputer)
753
+ :
754
+ nthreads(nthreads),
755
+ ndim(ndim),
756
+ ntrees(ntrees),
757
+ build_imputer(build_imputer) {this->is_fitted = true;};
758
+
759
+ template <class itype>
760
+ IsolationForest IsolationForest::deserialize_template(itype &inp, int nthreads)
761
+ {
762
+ bool is_isotree_model = false;
763
+ bool is_compatible = false;
764
+ bool has_combined_objects = false;
765
+ bool has_IsoForest = false;
766
+ bool has_ExtIsoForest = false;
767
+ bool has_Imputer = false;
768
+ bool has_Indexer = false;
769
+ bool has_metadata = false;
770
+ size_t size_metadata = 0;
771
+ inspect_serialized_object(
772
+ inp,
773
+ is_isotree_model,
774
+ is_compatible,
775
+ has_combined_objects,
776
+ has_IsoForest,
777
+ has_ExtIsoForest,
778
+ has_Imputer,
779
+ has_Indexer,
780
+ has_metadata,
781
+ size_metadata
782
+ );
783
+ if (is_isotree_model && is_compatible && !has_combined_objects)
784
+ throw std::runtime_error("Serialized model is not compatible.\n");
785
+
786
+ IsoForest model = IsoForest();
787
+ ExtIsoForest model_ext = ExtIsoForest();
788
+ Imputer imputer = Imputer();
789
+ TreesIndexer indexer = TreesIndexer();
790
+
791
+ deserialize_combined(
792
+ inp,
793
+ &model,
794
+ &model_ext,
795
+ &imputer,
796
+ &indexer,
797
+ (char*)nullptr
798
+ );
799
+
800
+ if (model.trees.empty() && model_ext.hplanes.empty())
801
+ throw std::runtime_error("Error: model contains no trees.\n");
802
+
803
+ size_t ntrees;
804
+ size_t ndim = 3;
805
+ bool build_imputer = false;
806
+
807
+ if (!model.trees.empty()) {
808
+ ntrees = model.trees.size();
809
+ ndim = 1;
810
+ }
811
+ else {
812
+ ntrees = model_ext.hplanes.size();
813
+ }
814
+ if (!imputer.imputer_tree.empty()) {
815
+ if (imputer.imputer_tree.size() != ntrees)
816
+ throw std::runtime_error("Error: imputer has incorrect number of trees.\n");
817
+ build_imputer = true;
818
+ }
819
+ if (!indexer.indices.empty()) {
820
+ if (indexer.indices.size() != ntrees)
821
+ throw std::runtime_error("Error: indexer has incorrect number of trees.\n");
822
+ }
823
+
824
+ IsolationForest out = IsolationForest(nthreads, ndim, ntrees, build_imputer);
825
+
826
+ if (!model.trees.empty()) {
827
+ out.get_model() = std::move(model);
828
+ out.penalize_range = out.get_model().has_range_penalty;
829
+ }
830
+ else {
831
+ out.get_model_ext() = std::move(model_ext);
832
+ out.penalize_range = out.get_model_ext().has_range_penalty;
833
+ }
834
+ if (!imputer.imputer_tree.empty())
835
+ out.get_imputer() = std::move(imputer);
836
+ if (!indexer.indices.empty())
837
+ out.indexer = std::move(indexer);
838
+
839
+ return out;
840
+ }
841
+
842
+ #endif