isotree 0.2.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -1
- data/LICENSE.txt +2 -2
- data/README.md +32 -14
- data/ext/isotree/ext.cpp +144 -31
- data/ext/isotree/extconf.rb +7 -7
- data/lib/isotree/isolation_forest.rb +110 -30
- data/lib/isotree/version.rb +1 -1
- data/vendor/isotree/LICENSE +1 -1
- data/vendor/isotree/README.md +165 -27
- data/vendor/isotree/include/isotree.hpp +2111 -0
- data/vendor/isotree/include/isotree_oop.hpp +394 -0
- data/vendor/isotree/inst/COPYRIGHTS +62 -0
- data/vendor/isotree/src/RcppExports.cpp +525 -52
- data/vendor/isotree/src/Rwrapper.cpp +1931 -268
- data/vendor/isotree/src/c_interface.cpp +953 -0
- data/vendor/isotree/src/crit.hpp +4232 -0
- data/vendor/isotree/src/dist.hpp +1886 -0
- data/vendor/isotree/src/exp_depth_table.hpp +134 -0
- data/vendor/isotree/src/extended.hpp +1444 -0
- data/vendor/isotree/src/external_facing_generic.hpp +399 -0
- data/vendor/isotree/src/fit_model.hpp +2401 -0
- data/vendor/isotree/src/{dealloc.cpp → headers_joined.hpp} +38 -22
- data/vendor/isotree/src/helpers_iforest.hpp +813 -0
- data/vendor/isotree/src/{impute.cpp → impute.hpp} +353 -122
- data/vendor/isotree/src/indexer.cpp +515 -0
- data/vendor/isotree/src/instantiate_template_headers.cpp +118 -0
- data/vendor/isotree/src/instantiate_template_headers.hpp +240 -0
- data/vendor/isotree/src/isoforest.hpp +1659 -0
- data/vendor/isotree/src/isotree.hpp +1804 -392
- data/vendor/isotree/src/isotree_exportable.hpp +99 -0
- data/vendor/isotree/src/merge_models.cpp +159 -16
- data/vendor/isotree/src/mult.hpp +1321 -0
- data/vendor/isotree/src/oop_interface.cpp +842 -0
- data/vendor/isotree/src/oop_interface.hpp +278 -0
- data/vendor/isotree/src/other_helpers.hpp +219 -0
- data/vendor/isotree/src/predict.hpp +1932 -0
- data/vendor/isotree/src/python_helpers.hpp +134 -0
- data/vendor/isotree/src/ref_indexer.hpp +154 -0
- data/vendor/isotree/src/robinmap/LICENSE +21 -0
- data/vendor/isotree/src/robinmap/README.md +483 -0
- data/vendor/isotree/src/robinmap/include/tsl/robin_growth_policy.h +406 -0
- data/vendor/isotree/src/robinmap/include/tsl/robin_hash.h +1620 -0
- data/vendor/isotree/src/robinmap/include/tsl/robin_map.h +807 -0
- data/vendor/isotree/src/robinmap/include/tsl/robin_set.h +660 -0
- data/vendor/isotree/src/serialize.cpp +4300 -139
- data/vendor/isotree/src/sql.cpp +141 -59
- data/vendor/isotree/src/subset_models.cpp +174 -0
- data/vendor/isotree/src/utils.hpp +3808 -0
- data/vendor/isotree/src/xoshiro.hpp +467 -0
- data/vendor/isotree/src/ziggurat.hpp +405 -0
- metadata +38 -104
- data/vendor/cereal/LICENSE +0 -24
- data/vendor/cereal/README.md +0 -85
- data/vendor/cereal/include/cereal/access.hpp +0 -351
- data/vendor/cereal/include/cereal/archives/adapters.hpp +0 -163
- data/vendor/cereal/include/cereal/archives/binary.hpp +0 -169
- data/vendor/cereal/include/cereal/archives/json.hpp +0 -1019
- data/vendor/cereal/include/cereal/archives/portable_binary.hpp +0 -334
- data/vendor/cereal/include/cereal/archives/xml.hpp +0 -956
- data/vendor/cereal/include/cereal/cereal.hpp +0 -1089
- data/vendor/cereal/include/cereal/details/helpers.hpp +0 -422
- data/vendor/cereal/include/cereal/details/polymorphic_impl.hpp +0 -796
- data/vendor/cereal/include/cereal/details/polymorphic_impl_fwd.hpp +0 -65
- data/vendor/cereal/include/cereal/details/static_object.hpp +0 -127
- data/vendor/cereal/include/cereal/details/traits.hpp +0 -1411
- data/vendor/cereal/include/cereal/details/util.hpp +0 -84
- data/vendor/cereal/include/cereal/external/base64.hpp +0 -134
- data/vendor/cereal/include/cereal/external/rapidjson/allocators.h +0 -284
- data/vendor/cereal/include/cereal/external/rapidjson/cursorstreamwrapper.h +0 -78
- data/vendor/cereal/include/cereal/external/rapidjson/document.h +0 -2652
- data/vendor/cereal/include/cereal/external/rapidjson/encodedstream.h +0 -299
- data/vendor/cereal/include/cereal/external/rapidjson/encodings.h +0 -716
- data/vendor/cereal/include/cereal/external/rapidjson/error/en.h +0 -74
- data/vendor/cereal/include/cereal/external/rapidjson/error/error.h +0 -161
- data/vendor/cereal/include/cereal/external/rapidjson/filereadstream.h +0 -99
- data/vendor/cereal/include/cereal/external/rapidjson/filewritestream.h +0 -104
- data/vendor/cereal/include/cereal/external/rapidjson/fwd.h +0 -151
- data/vendor/cereal/include/cereal/external/rapidjson/internal/biginteger.h +0 -290
- data/vendor/cereal/include/cereal/external/rapidjson/internal/diyfp.h +0 -271
- data/vendor/cereal/include/cereal/external/rapidjson/internal/dtoa.h +0 -245
- data/vendor/cereal/include/cereal/external/rapidjson/internal/ieee754.h +0 -78
- data/vendor/cereal/include/cereal/external/rapidjson/internal/itoa.h +0 -308
- data/vendor/cereal/include/cereal/external/rapidjson/internal/meta.h +0 -186
- data/vendor/cereal/include/cereal/external/rapidjson/internal/pow10.h +0 -55
- data/vendor/cereal/include/cereal/external/rapidjson/internal/regex.h +0 -740
- data/vendor/cereal/include/cereal/external/rapidjson/internal/stack.h +0 -232
- data/vendor/cereal/include/cereal/external/rapidjson/internal/strfunc.h +0 -69
- data/vendor/cereal/include/cereal/external/rapidjson/internal/strtod.h +0 -290
- data/vendor/cereal/include/cereal/external/rapidjson/internal/swap.h +0 -46
- data/vendor/cereal/include/cereal/external/rapidjson/istreamwrapper.h +0 -128
- data/vendor/cereal/include/cereal/external/rapidjson/memorybuffer.h +0 -70
- data/vendor/cereal/include/cereal/external/rapidjson/memorystream.h +0 -71
- data/vendor/cereal/include/cereal/external/rapidjson/msinttypes/inttypes.h +0 -316
- data/vendor/cereal/include/cereal/external/rapidjson/msinttypes/stdint.h +0 -300
- data/vendor/cereal/include/cereal/external/rapidjson/ostreamwrapper.h +0 -81
- data/vendor/cereal/include/cereal/external/rapidjson/pointer.h +0 -1414
- data/vendor/cereal/include/cereal/external/rapidjson/prettywriter.h +0 -277
- data/vendor/cereal/include/cereal/external/rapidjson/rapidjson.h +0 -656
- data/vendor/cereal/include/cereal/external/rapidjson/reader.h +0 -2230
- data/vendor/cereal/include/cereal/external/rapidjson/schema.h +0 -2497
- data/vendor/cereal/include/cereal/external/rapidjson/stream.h +0 -223
- data/vendor/cereal/include/cereal/external/rapidjson/stringbuffer.h +0 -121
- data/vendor/cereal/include/cereal/external/rapidjson/writer.h +0 -709
- data/vendor/cereal/include/cereal/external/rapidxml/license.txt +0 -52
- data/vendor/cereal/include/cereal/external/rapidxml/manual.html +0 -406
- data/vendor/cereal/include/cereal/external/rapidxml/rapidxml.hpp +0 -2624
- data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_iterators.hpp +0 -175
- data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_print.hpp +0 -428
- data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_utils.hpp +0 -123
- data/vendor/cereal/include/cereal/macros.hpp +0 -154
- data/vendor/cereal/include/cereal/specialize.hpp +0 -139
- data/vendor/cereal/include/cereal/types/array.hpp +0 -79
- data/vendor/cereal/include/cereal/types/atomic.hpp +0 -55
- data/vendor/cereal/include/cereal/types/base_class.hpp +0 -203
- data/vendor/cereal/include/cereal/types/bitset.hpp +0 -176
- data/vendor/cereal/include/cereal/types/boost_variant.hpp +0 -164
- data/vendor/cereal/include/cereal/types/chrono.hpp +0 -72
- data/vendor/cereal/include/cereal/types/common.hpp +0 -129
- data/vendor/cereal/include/cereal/types/complex.hpp +0 -56
- data/vendor/cereal/include/cereal/types/concepts/pair_associative_container.hpp +0 -73
- data/vendor/cereal/include/cereal/types/deque.hpp +0 -62
- data/vendor/cereal/include/cereal/types/forward_list.hpp +0 -68
- data/vendor/cereal/include/cereal/types/functional.hpp +0 -43
- data/vendor/cereal/include/cereal/types/list.hpp +0 -62
- data/vendor/cereal/include/cereal/types/map.hpp +0 -36
- data/vendor/cereal/include/cereal/types/memory.hpp +0 -425
- data/vendor/cereal/include/cereal/types/optional.hpp +0 -66
- data/vendor/cereal/include/cereal/types/polymorphic.hpp +0 -483
- data/vendor/cereal/include/cereal/types/queue.hpp +0 -132
- data/vendor/cereal/include/cereal/types/set.hpp +0 -103
- data/vendor/cereal/include/cereal/types/stack.hpp +0 -76
- data/vendor/cereal/include/cereal/types/string.hpp +0 -61
- data/vendor/cereal/include/cereal/types/tuple.hpp +0 -123
- data/vendor/cereal/include/cereal/types/unordered_map.hpp +0 -36
- data/vendor/cereal/include/cereal/types/unordered_set.hpp +0 -99
- data/vendor/cereal/include/cereal/types/utility.hpp +0 -47
- data/vendor/cereal/include/cereal/types/valarray.hpp +0 -89
- data/vendor/cereal/include/cereal/types/variant.hpp +0 -109
- data/vendor/cereal/include/cereal/types/vector.hpp +0 -112
- data/vendor/cereal/include/cereal/version.hpp +0 -52
- data/vendor/isotree/src/Makevars +0 -4
- data/vendor/isotree/src/crit.cpp +0 -912
- data/vendor/isotree/src/dist.cpp +0 -749
- data/vendor/isotree/src/extended.cpp +0 -790
- data/vendor/isotree/src/fit_model.cpp +0 -1090
- data/vendor/isotree/src/helpers_iforest.cpp +0 -324
- data/vendor/isotree/src/isoforest.cpp +0 -771
- data/vendor/isotree/src/mult.cpp +0 -607
- data/vendor/isotree/src/predict.cpp +0 -853
- data/vendor/isotree/src/utils.cpp +0 -1566
|
@@ -0,0 +1,953 @@
|
|
|
1
|
+
/* Isolation forests and variations thereof, with adjustments for incorporation
|
|
2
|
+
* of categorical variables and missing values.
|
|
3
|
+
* Writen for C++11 standard and aimed at being used in R and Python.
|
|
4
|
+
*
|
|
5
|
+
* This library is based on the following works:
|
|
6
|
+
* [1] Liu, Fei Tony, Kai Ming Ting, and Zhi-Hua Zhou.
|
|
7
|
+
* "Isolation forest."
|
|
8
|
+
* 2008 Eighth IEEE International Conference on Data Mining. IEEE, 2008.
|
|
9
|
+
* [2] Liu, Fei Tony, Kai Ming Ting, and Zhi-Hua Zhou.
|
|
10
|
+
* "Isolation-based anomaly detection."
|
|
11
|
+
* ACM Transactions on Knowledge Discovery from Data (TKDD) 6.1 (2012): 3.
|
|
12
|
+
* [3] Hariri, Sahand, Matias Carrasco Kind, and Robert J. Brunner.
|
|
13
|
+
* "Extended Isolation Forest."
|
|
14
|
+
* arXiv preprint arXiv:1811.02141 (2018).
|
|
15
|
+
* [4] Liu, Fei Tony, Kai Ming Ting, and Zhi-Hua Zhou.
|
|
16
|
+
* "On detecting clustered anomalies using SCiForest."
|
|
17
|
+
* Joint European Conference on Machine Learning and Knowledge Discovery in Databases. Springer, Berlin, Heidelberg, 2010.
|
|
18
|
+
* [5] https://sourceforge.net/projects/iforest/
|
|
19
|
+
* [6] https://math.stackexchange.com/questions/3388518/expected-number-of-paths-required-to-separate-elements-in-a-binary-tree
|
|
20
|
+
* [7] Quinlan, J. Ross. C4. 5: programs for machine learning. Elsevier, 2014.
|
|
21
|
+
* [8] Cortes, David.
|
|
22
|
+
* "Distance approximation using Isolation Forests."
|
|
23
|
+
* arXiv preprint arXiv:1910.12362 (2019).
|
|
24
|
+
* [9] Cortes, David.
|
|
25
|
+
* "Imputing missing values with unsupervised random trees."
|
|
26
|
+
* arXiv preprint arXiv:1911.06646 (2019).
|
|
27
|
+
* [10] https://math.stackexchange.com/questions/3333220/expected-average-depth-in-random-binary-tree-constructed-top-to-bottom
|
|
28
|
+
* [11] Cortes, David.
|
|
29
|
+
* "Revisiting randomized choices in isolation forests."
|
|
30
|
+
* arXiv preprint arXiv:2110.13402 (2021).
|
|
31
|
+
* [12] Guha, Sudipto, et al.
|
|
32
|
+
* "Robust random cut forest based anomaly detection on streams."
|
|
33
|
+
* International conference on machine learning. PMLR, 2016.
|
|
34
|
+
* [13] Cortes, David.
|
|
35
|
+
* "Isolation forests: looking beyond tree depth."
|
|
36
|
+
* arXiv preprint arXiv:2111.11639 (2021).
|
|
37
|
+
* [14] Ting, Kai Ming, Yue Zhu, and Zhi-Hua Zhou.
|
|
38
|
+
* "Isolation kernel and its effect on SVM"
|
|
39
|
+
* Proceedings of the 24th ACM SIGKDD
|
|
40
|
+
* International Conference on Knowledge Discovery & Data Mining. 2018.
|
|
41
|
+
*
|
|
42
|
+
* BSD 2-Clause License
|
|
43
|
+
* Copyright (c) 2019-2022, David Cortes
|
|
44
|
+
* All rights reserved.
|
|
45
|
+
* Redistribution and use in source and binary forms, with or without
|
|
46
|
+
* modification, are permitted provided that the following conditions are met:
|
|
47
|
+
* * Redistributions of source code must retain the above copyright notice, this
|
|
48
|
+
* list of conditions and the following disclaimer.
|
|
49
|
+
* * Redistributions in binary form must reproduce the above copyright notice,
|
|
50
|
+
* this list of conditions and the following disclaimer in the documentation
|
|
51
|
+
* and/or other materials provided with the distribution.
|
|
52
|
+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
53
|
+
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
54
|
+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
55
|
+
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
56
|
+
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
57
|
+
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
58
|
+
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
59
|
+
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
60
|
+
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
61
|
+
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
62
|
+
*/
|
|
63
|
+
#if !defined(_FOR_R) && !defined(_FOR_PYTHON)
|
|
64
|
+
|
|
65
|
+
#include "oop_interface.hpp"
|
|
66
|
+
|
|
67
|
+
using std::cerr;
|
|
68
|
+
using isotree::IsolationForest;
|
|
69
|
+
|
|
70
|
+
enum IsoTreeExitCodes {IsoTreeSuccess=0, IsoTreeError=1};
|
|
71
|
+
|
|
72
|
+
struct IsoTree_Params {
|
|
73
|
+
int nthreads = -1; /* <- May be manually changed at any time */
|
|
74
|
+
|
|
75
|
+
uint64_t random_seed = 1;
|
|
76
|
+
|
|
77
|
+
/* General tree construction parameters */
|
|
78
|
+
size_t ndim = 3;
|
|
79
|
+
size_t ntry = 1;
|
|
80
|
+
CoefType coef_type = Uniform; /* only for ndim>1 */
|
|
81
|
+
bool with_replacement = false;
|
|
82
|
+
bool weight_as_sample = true;
|
|
83
|
+
size_t sample_size = 0;
|
|
84
|
+
size_t ntrees = 500;
|
|
85
|
+
size_t max_depth = 0;
|
|
86
|
+
size_t ncols_per_tree = 0;
|
|
87
|
+
bool limit_depth = true;
|
|
88
|
+
bool penalize_range = false;
|
|
89
|
+
bool standardize_data = true;
|
|
90
|
+
ScoringMetric scoring_metric = Depth;
|
|
91
|
+
bool fast_bratio = true;
|
|
92
|
+
bool weigh_by_kurt = false;
|
|
93
|
+
double prob_pick_by_gain_pl = 0.;
|
|
94
|
+
double prob_pick_by_gain_avg = 0.;
|
|
95
|
+
double prob_pick_by_full_gain = 0.;
|
|
96
|
+
double prob_pick_by_dens = 0.;
|
|
97
|
+
double prob_pick_col_by_range = 0.;
|
|
98
|
+
double prob_pick_col_by_var = 0.;
|
|
99
|
+
double prob_pick_col_by_kurt = 0.;
|
|
100
|
+
double min_gain = 0.;
|
|
101
|
+
MissingAction missing_action = Impute;
|
|
102
|
+
|
|
103
|
+
/* For categorical variables */
|
|
104
|
+
CategSplit cat_split_type = SubSet;
|
|
105
|
+
NewCategAction new_cat_action = Weighted;
|
|
106
|
+
bool coef_by_prop = false;
|
|
107
|
+
bool all_perm = false;
|
|
108
|
+
|
|
109
|
+
/* For imputation methods (when using 'build_imputer=true' and calling 'impute') */
|
|
110
|
+
bool build_imputer = false;
|
|
111
|
+
size_t min_imp_obs = 3;
|
|
112
|
+
UseDepthImp depth_imp = Higher;
|
|
113
|
+
WeighImpRows weigh_imp_rows = Inverse;
|
|
114
|
+
};
|
|
115
|
+
|
|
116
|
+
extern "C" {
|
|
117
|
+
|
|
118
|
+
ISOTREE_EXPORTED
|
|
119
|
+
void* allocate_default_isotree_parameters()
|
|
120
|
+
{
|
|
121
|
+
return new IsoTree_Params();
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
ISOTREE_EXPORTED
|
|
125
|
+
void delete_isotree_parameters(void *isotree_parameters)
|
|
126
|
+
{
|
|
127
|
+
IsoTree_Params *params = (IsoTree_Params*)isotree_parameters;
|
|
128
|
+
delete params;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
ISOTREE_EXPORTED
|
|
132
|
+
void set_isotree_parameters
|
|
133
|
+
(
|
|
134
|
+
void *isotree_parameters,
|
|
135
|
+
int* nthreads,
|
|
136
|
+
uint64_t* random_seed,
|
|
137
|
+
size_t* ndim,
|
|
138
|
+
size_t* ntry,
|
|
139
|
+
uint8_t* coef_type,
|
|
140
|
+
uint8_t* with_replacement,
|
|
141
|
+
uint8_t* weight_as_sample,
|
|
142
|
+
size_t* sample_size,
|
|
143
|
+
size_t* ntrees,
|
|
144
|
+
size_t* max_depth,
|
|
145
|
+
size_t* ncols_per_tree,
|
|
146
|
+
uint8_t* limit_depth,
|
|
147
|
+
uint8_t* penalize_range,
|
|
148
|
+
uint8_t* standardize_data,
|
|
149
|
+
uint8_t* scoring_metric,
|
|
150
|
+
uint8_t* fast_bratio,
|
|
151
|
+
uint8_t* weigh_by_kurt,
|
|
152
|
+
double* prob_pick_by_gain_pl,
|
|
153
|
+
double* prob_pick_by_gain_avg,
|
|
154
|
+
double* prob_pick_by_full_gain,
|
|
155
|
+
double* prob_pick_by_dens,
|
|
156
|
+
double* prob_pick_col_by_range,
|
|
157
|
+
double* prob_pick_col_by_var,
|
|
158
|
+
double* prob_pick_col_by_kurt,
|
|
159
|
+
double* min_gain,
|
|
160
|
+
uint8_t* missing_action,
|
|
161
|
+
uint8_t* cat_split_type,
|
|
162
|
+
uint8_t* new_cat_action,
|
|
163
|
+
uint8_t* coef_by_prop,
|
|
164
|
+
uint8_t* all_perm,
|
|
165
|
+
uint8_t* build_imputer,
|
|
166
|
+
size_t* min_imp_obs,
|
|
167
|
+
uint8_t* depth_imp,
|
|
168
|
+
uint8_t* weigh_imp_rows
|
|
169
|
+
)
|
|
170
|
+
{
|
|
171
|
+
if (!isotree_parameters) {
|
|
172
|
+
cerr << "Passed NULL pointer to 'set_isotree_parameters'." << std::endl;
|
|
173
|
+
return;
|
|
174
|
+
}
|
|
175
|
+
IsoTree_Params *params = (IsoTree_Params*)isotree_parameters;
|
|
176
|
+
if (nthreads) params->nthreads = *nthreads;
|
|
177
|
+
if (random_seed) params->random_seed = *random_seed;
|
|
178
|
+
if (ndim) params->ndim = *ndim;
|
|
179
|
+
if (ntry) params->ntry = *ntry;
|
|
180
|
+
if (coef_type) params->coef_type = (CoefType)*coef_type;
|
|
181
|
+
if (with_replacement) params->with_replacement = *with_replacement;
|
|
182
|
+
if (weight_as_sample) params->weight_as_sample = *weight_as_sample;
|
|
183
|
+
if (sample_size) params->sample_size = *sample_size;
|
|
184
|
+
if (ntrees) params->ntrees = *ntrees;
|
|
185
|
+
if (max_depth) params->max_depth = *max_depth;
|
|
186
|
+
if (ncols_per_tree) params->ncols_per_tree = *ncols_per_tree;
|
|
187
|
+
if (limit_depth) params->limit_depth = *limit_depth;
|
|
188
|
+
if (penalize_range) params->penalize_range = *penalize_range;
|
|
189
|
+
if (standardize_data) params->standardize_data = *standardize_data;
|
|
190
|
+
if (scoring_metric) params->scoring_metric = (ScoringMetric)*scoring_metric;
|
|
191
|
+
if (fast_bratio) params->fast_bratio = *fast_bratio;
|
|
192
|
+
if (weigh_by_kurt) params->weigh_by_kurt = *weigh_by_kurt;
|
|
193
|
+
if (prob_pick_by_gain_avg) params->prob_pick_by_gain_avg = *prob_pick_by_gain_avg;
|
|
194
|
+
if (prob_pick_by_gain_pl) params->prob_pick_by_gain_pl = *prob_pick_by_gain_pl;
|
|
195
|
+
if (prob_pick_by_full_gain) params->prob_pick_by_full_gain = *prob_pick_by_full_gain;
|
|
196
|
+
if (prob_pick_by_dens) params->prob_pick_by_dens = *prob_pick_by_dens;
|
|
197
|
+
if (prob_pick_col_by_range) params->prob_pick_col_by_range = *prob_pick_col_by_range;
|
|
198
|
+
if (prob_pick_col_by_var) params->prob_pick_col_by_var = *prob_pick_col_by_var;
|
|
199
|
+
if (prob_pick_col_by_kurt) params->prob_pick_col_by_kurt = *prob_pick_col_by_kurt;
|
|
200
|
+
if (min_gain) params->min_gain = *min_gain;
|
|
201
|
+
if (missing_action) params->missing_action = (MissingAction)*missing_action;
|
|
202
|
+
if (cat_split_type) params->cat_split_type = (CategSplit)*cat_split_type;
|
|
203
|
+
if (new_cat_action) params->new_cat_action = (NewCategAction)*new_cat_action;
|
|
204
|
+
if (coef_by_prop) params->coef_by_prop = *coef_by_prop;
|
|
205
|
+
if (all_perm) params->all_perm = *all_perm;
|
|
206
|
+
if (build_imputer) params->build_imputer = *build_imputer;
|
|
207
|
+
if (min_imp_obs) params->min_imp_obs = *min_imp_obs;
|
|
208
|
+
if (depth_imp) params->depth_imp = (UseDepthImp)*depth_imp;
|
|
209
|
+
if (weigh_imp_rows) params->weigh_imp_rows = (WeighImpRows)*weigh_imp_rows;
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
ISOTREE_EXPORTED
|
|
213
|
+
void get_isotree_parameters
|
|
214
|
+
(
|
|
215
|
+
const void *isotree_parameters,
|
|
216
|
+
int* nthreads,
|
|
217
|
+
uint64_t* random_seed,
|
|
218
|
+
size_t* ndim,
|
|
219
|
+
size_t* ntry,
|
|
220
|
+
uint8_t* coef_type,
|
|
221
|
+
uint8_t* with_replacement,
|
|
222
|
+
uint8_t* weight_as_sample,
|
|
223
|
+
size_t* sample_size,
|
|
224
|
+
size_t* ntrees,
|
|
225
|
+
size_t* max_depth,
|
|
226
|
+
size_t* ncols_per_tree,
|
|
227
|
+
uint8_t* limit_depth,
|
|
228
|
+
uint8_t* penalize_range,
|
|
229
|
+
uint8_t* standardize_data,
|
|
230
|
+
uint8_t* scoring_metric,
|
|
231
|
+
uint8_t* fast_bratio,
|
|
232
|
+
uint8_t* weigh_by_kurt,
|
|
233
|
+
double* prob_pick_by_gain_pl,
|
|
234
|
+
double* prob_pick_by_gain_avg,
|
|
235
|
+
double* prob_pick_by_full_gain,
|
|
236
|
+
double* prob_pick_by_dens,
|
|
237
|
+
double* prob_pick_col_by_range,
|
|
238
|
+
double* prob_pick_col_by_var,
|
|
239
|
+
double* prob_pick_col_by_kurt,
|
|
240
|
+
double* min_gain,
|
|
241
|
+
uint8_t* missing_action,
|
|
242
|
+
uint8_t* cat_split_type,
|
|
243
|
+
uint8_t* new_cat_action,
|
|
244
|
+
uint8_t* coef_by_prop,
|
|
245
|
+
uint8_t* all_perm,
|
|
246
|
+
uint8_t* build_imputer,
|
|
247
|
+
size_t* min_imp_obs,
|
|
248
|
+
uint8_t* depth_imp,
|
|
249
|
+
uint8_t* weigh_imp_rows
|
|
250
|
+
)
|
|
251
|
+
{
|
|
252
|
+
if (!isotree_parameters) {
|
|
253
|
+
cerr << "Passed NULL pointer to 'get_isotree_parameters'." << std::endl;
|
|
254
|
+
return;
|
|
255
|
+
}
|
|
256
|
+
const IsoTree_Params *params = (IsoTree_Params*)isotree_parameters;
|
|
257
|
+
if (nthreads) *nthreads = params->nthreads;
|
|
258
|
+
if (random_seed) *random_seed = params->random_seed;
|
|
259
|
+
if (ndim) *ndim = params->ndim;
|
|
260
|
+
if (ntry) *ntry = params->ntry;
|
|
261
|
+
if (coef_type) *coef_type = params->coef_type;
|
|
262
|
+
if (with_replacement) *with_replacement = params->with_replacement;
|
|
263
|
+
if (weight_as_sample) *weight_as_sample = params->weight_as_sample;
|
|
264
|
+
if (sample_size) *sample_size = params->sample_size;
|
|
265
|
+
if (ntrees) *ntrees = params->ntrees;
|
|
266
|
+
if (max_depth) *max_depth = params->max_depth;
|
|
267
|
+
if (ncols_per_tree) *ncols_per_tree = params->ncols_per_tree;
|
|
268
|
+
if (limit_depth) *limit_depth = params->limit_depth;
|
|
269
|
+
if (penalize_range) *penalize_range = params->penalize_range;
|
|
270
|
+
if (standardize_data) *standardize_data = params->standardize_data;
|
|
271
|
+
if (scoring_metric) *scoring_metric = params->scoring_metric;
|
|
272
|
+
if (fast_bratio) *fast_bratio = params->fast_bratio;
|
|
273
|
+
if (weigh_by_kurt) *weigh_by_kurt = params->weigh_by_kurt;
|
|
274
|
+
if (prob_pick_by_gain_avg) *prob_pick_by_gain_avg = params->prob_pick_by_gain_avg;
|
|
275
|
+
if (prob_pick_by_gain_pl) *prob_pick_by_gain_pl = params->prob_pick_by_gain_pl;
|
|
276
|
+
if (prob_pick_by_full_gain) *prob_pick_by_full_gain = params->prob_pick_by_full_gain;
|
|
277
|
+
if (prob_pick_by_dens) *prob_pick_by_dens = params->prob_pick_by_dens;
|
|
278
|
+
if (prob_pick_col_by_range) *prob_pick_col_by_range = params->prob_pick_col_by_range;
|
|
279
|
+
if (prob_pick_col_by_var) *prob_pick_col_by_var = params->prob_pick_col_by_var;
|
|
280
|
+
if (prob_pick_col_by_kurt) *prob_pick_col_by_kurt = params->prob_pick_col_by_kurt;
|
|
281
|
+
if (min_gain) *min_gain = params->min_gain;
|
|
282
|
+
if (missing_action) *missing_action = params->missing_action;
|
|
283
|
+
if (cat_split_type) *cat_split_type = params->cat_split_type;
|
|
284
|
+
if (new_cat_action) *new_cat_action = params->new_cat_action;
|
|
285
|
+
if (coef_by_prop) *coef_by_prop = params->coef_by_prop;
|
|
286
|
+
if (all_perm) *all_perm = params->all_perm;
|
|
287
|
+
if (build_imputer) *build_imputer = params->build_imputer;
|
|
288
|
+
if (min_imp_obs) *min_imp_obs = params->min_imp_obs;
|
|
289
|
+
if (depth_imp) *depth_imp = params->depth_imp;
|
|
290
|
+
if (weigh_imp_rows) *weigh_imp_rows = params->weigh_imp_rows;
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
|
|
294
|
+
ISOTREE_EXPORTED
|
|
295
|
+
void* isotree_fit
|
|
296
|
+
(
|
|
297
|
+
const void *isotree_parameters,
|
|
298
|
+
size_t nrows,
|
|
299
|
+
double *numeric_data,
|
|
300
|
+
size_t ncols_numeric,
|
|
301
|
+
int *categ_data,
|
|
302
|
+
size_t ncols_categ,
|
|
303
|
+
int *ncateg,
|
|
304
|
+
double *csc_values,
|
|
305
|
+
int *csc_indices,
|
|
306
|
+
int *csc_indptr,
|
|
307
|
+
double *row_weights,
|
|
308
|
+
double *column_weights
|
|
309
|
+
)
|
|
310
|
+
{
|
|
311
|
+
if (!ncols_numeric && !ncols_categ) {
|
|
312
|
+
cerr << "Data has no columns" << std::endl;
|
|
313
|
+
return nullptr;
|
|
314
|
+
}
|
|
315
|
+
if (categ_data && !ncateg) {
|
|
316
|
+
cerr << "Must pass 'ncateg' if there is categorical data" << std::endl;
|
|
317
|
+
return nullptr;
|
|
318
|
+
}
|
|
319
|
+
if (!isotree_parameters) {
|
|
320
|
+
cerr << "Passed NULL 'isotree_parameters' to 'isotree_fit'." << std::endl;
|
|
321
|
+
return nullptr;
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
const IsoTree_Params *params = (const IsoTree_Params*)isotree_parameters;
|
|
325
|
+
try
|
|
326
|
+
{
|
|
327
|
+
std::unique_ptr<IsolationForest> iso(
|
|
328
|
+
new IsolationForest(
|
|
329
|
+
params->ndim, params->ntry, params->coef_type, params->coef_by_prop,
|
|
330
|
+
params->with_replacement, params->weight_as_sample,
|
|
331
|
+
params->sample_size, params->ntrees,
|
|
332
|
+
params->max_depth, params->ncols_per_tree,
|
|
333
|
+
params->limit_depth, params->penalize_range,
|
|
334
|
+
params->standardize_data, params->scoring_metric, params->fast_bratio, params->weigh_by_kurt,
|
|
335
|
+
params->prob_pick_by_gain_pl, params->prob_pick_by_gain_avg,
|
|
336
|
+
params->prob_pick_by_full_gain, params->prob_pick_by_dens,
|
|
337
|
+
params->prob_pick_col_by_range, params->prob_pick_col_by_var,
|
|
338
|
+
params->prob_pick_col_by_kurt,
|
|
339
|
+
params->min_gain, params->missing_action,
|
|
340
|
+
params->cat_split_type, params->new_cat_action,
|
|
341
|
+
params->all_perm, params->build_imputer, params->min_imp_obs,
|
|
342
|
+
params->depth_imp, params->weigh_imp_rows,
|
|
343
|
+
params->random_seed, params->nthreads
|
|
344
|
+
)
|
|
345
|
+
);
|
|
346
|
+
|
|
347
|
+
if (numeric_data && !categ_data && !csc_indptr) {
|
|
348
|
+
iso->fit(numeric_data, nrows, ncols_numeric);
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
else if (categ_data && !csc_indptr) {
|
|
352
|
+
iso->fit(numeric_data, ncols_numeric, nrows,
|
|
353
|
+
categ_data, ncols_categ, ncateg,
|
|
354
|
+
row_weights, column_weights);
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
else if (csc_indptr) {
|
|
358
|
+
iso->fit(csc_values, csc_indices, csc_indptr,
|
|
359
|
+
ncols_numeric, nrows,
|
|
360
|
+
categ_data, ncols_categ, ncateg,
|
|
361
|
+
row_weights, column_weights);
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
else {
|
|
365
|
+
throw std::runtime_error("Invalid input data.\n");
|
|
366
|
+
}
|
|
367
|
+
|
|
368
|
+
return iso.release();
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
catch (std::exception &e)
|
|
372
|
+
{
|
|
373
|
+
cerr << e.what();
|
|
374
|
+
cerr.flush();
|
|
375
|
+
return nullptr;
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
ISOTREE_EXPORTED
|
|
380
|
+
void delete_isotree_model(void *isotree_model)
|
|
381
|
+
{
|
|
382
|
+
IsolationForest *ptr = (IsolationForest*)isotree_model;
|
|
383
|
+
delete ptr;
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
ISOTREE_EXPORTED
|
|
387
|
+
int isotree_predict
|
|
388
|
+
(
|
|
389
|
+
void *isotree_model,
|
|
390
|
+
double *output_scores,
|
|
391
|
+
int *output_tree_num,
|
|
392
|
+
double *per_tree_depths,
|
|
393
|
+
uint8_t standardize_scores,
|
|
394
|
+
size_t nrows,
|
|
395
|
+
uint8_t is_col_major,
|
|
396
|
+
double *numeric_data,
|
|
397
|
+
size_t ld_numeric,
|
|
398
|
+
int *categ_data,
|
|
399
|
+
size_t ld_categ,
|
|
400
|
+
uint8_t is_csc,
|
|
401
|
+
double *sparse_values,
|
|
402
|
+
int *sparse_indices,
|
|
403
|
+
int *sparse_indptr
|
|
404
|
+
)
|
|
405
|
+
{
|
|
406
|
+
if (!isotree_model) {
|
|
407
|
+
cerr << "Passed NULL 'isotree_model' to 'isotree_predict'." << std::endl;
|
|
408
|
+
return IsoTreeError;
|
|
409
|
+
}
|
|
410
|
+
if (!output_scores) {
|
|
411
|
+
cerr << "Passed NULL 'output_scores' to 'isotree_predict'." << std::endl;
|
|
412
|
+
return IsoTreeError;
|
|
413
|
+
}
|
|
414
|
+
IsolationForest *model = (IsolationForest*)isotree_model;
|
|
415
|
+
|
|
416
|
+
try
|
|
417
|
+
{
|
|
418
|
+
if (!sparse_indptr) {
|
|
419
|
+
model->predict(numeric_data, categ_data, (bool)is_col_major,
|
|
420
|
+
nrows, ld_numeric, ld_categ, (bool)standardize_scores,
|
|
421
|
+
output_scores, output_tree_num, per_tree_depths);
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
else {
|
|
425
|
+
model->predict(sparse_values, sparse_indices, sparse_indptr, (bool)is_csc,
|
|
426
|
+
categ_data, (bool)is_col_major, ld_categ, nrows, (bool)standardize_scores,
|
|
427
|
+
output_scores, output_tree_num, per_tree_depths);
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
return IsoTreeSuccess;
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
catch (std::exception &e)
|
|
434
|
+
{
|
|
435
|
+
cerr << e.what();
|
|
436
|
+
cerr.flush();
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
return IsoTreeError;
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
ISOTREE_EXPORTED
|
|
443
|
+
int isotree_predict_distance
|
|
444
|
+
(
|
|
445
|
+
void *isotree_model,
|
|
446
|
+
uint8_t output_triangular,
|
|
447
|
+
uint8_t as_kernel,
|
|
448
|
+
uint8_t standardize,
|
|
449
|
+
uint8_t assume_full_distr,
|
|
450
|
+
double *output_dist,
|
|
451
|
+
size_t nrows,
|
|
452
|
+
double *numeric_data,
|
|
453
|
+
int *categ_data,
|
|
454
|
+
double *csc_values,
|
|
455
|
+
int *csc_indices,
|
|
456
|
+
int *csc_indptr
|
|
457
|
+
)
|
|
458
|
+
{
|
|
459
|
+
if (!isotree_model) {
|
|
460
|
+
cerr << "Passed NULL 'isotree_model' to 'isotree_predict_distance'." << std::endl;
|
|
461
|
+
return IsoTreeError;
|
|
462
|
+
}
|
|
463
|
+
if (!output_dist) {
|
|
464
|
+
cerr << "Passed NULL 'output_dist' to 'isotree_predict_distance'." << std::endl;
|
|
465
|
+
return IsoTreeError;
|
|
466
|
+
}
|
|
467
|
+
IsolationForest *model = (IsolationForest*)isotree_model;
|
|
468
|
+
|
|
469
|
+
try
|
|
470
|
+
{
|
|
471
|
+
if (!csc_indptr) {
|
|
472
|
+
model->predict_distance(numeric_data, categ_data,
|
|
473
|
+
nrows, as_kernel,
|
|
474
|
+
(bool) assume_full_distr, (bool) standardize,
|
|
475
|
+
(bool) output_triangular,
|
|
476
|
+
output_dist);
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
else {
|
|
480
|
+
model->predict_distance(csc_values, csc_indices, csc_indptr, categ_data,
|
|
481
|
+
nrows, as_kernel, (bool) assume_full_distr, (bool) standardize,
|
|
482
|
+
(bool) output_triangular,
|
|
483
|
+
output_dist);
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
return IsoTreeSuccess;
|
|
487
|
+
}
|
|
488
|
+
|
|
489
|
+
catch (std::exception &e)
|
|
490
|
+
{
|
|
491
|
+
cerr << e.what();
|
|
492
|
+
cerr.flush();
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
return IsoTreeError;
|
|
496
|
+
}
|
|
497
|
+
|
|
498
|
+
ISOTREE_EXPORTED
|
|
499
|
+
int isotree_impute
|
|
500
|
+
(
|
|
501
|
+
void *isotree_model,
|
|
502
|
+
size_t nrows,
|
|
503
|
+
uint8_t is_col_major,
|
|
504
|
+
double *numeric_data,
|
|
505
|
+
int *categ_data,
|
|
506
|
+
double *csr_values,
|
|
507
|
+
int *csr_indices,
|
|
508
|
+
int *csr_indptr
|
|
509
|
+
)
|
|
510
|
+
{
|
|
511
|
+
if (!isotree_model) {
|
|
512
|
+
cerr << "Passed NULL 'isotree_model' to 'isotree_impute'." << std::endl;
|
|
513
|
+
return IsoTreeError;
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
IsolationForest *model = (IsolationForest*)isotree_model;
|
|
517
|
+
|
|
518
|
+
try
|
|
519
|
+
{
|
|
520
|
+
if (!csr_indptr) {
|
|
521
|
+
model->impute(numeric_data, categ_data, (bool) is_col_major, nrows);
|
|
522
|
+
}
|
|
523
|
+
|
|
524
|
+
else {
|
|
525
|
+
model->impute(csr_values, csr_indices, csr_indptr,
|
|
526
|
+
categ_data, (bool) is_col_major, nrows);
|
|
527
|
+
}
|
|
528
|
+
|
|
529
|
+
return IsoTreeSuccess;
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
catch (std::exception &e)
|
|
533
|
+
{
|
|
534
|
+
cerr << e.what();
|
|
535
|
+
cerr.flush();
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
return IsoTreeError;
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
ISOTREE_EXPORTED
|
|
542
|
+
int isotree_set_reference_points
|
|
543
|
+
(
|
|
544
|
+
void* isotree_model,
|
|
545
|
+
uint8_t with_distances,
|
|
546
|
+
size_t nrows,
|
|
547
|
+
uint8_t is_col_major,
|
|
548
|
+
double *numeric_data,
|
|
549
|
+
size_t ld_numeric,
|
|
550
|
+
int *categ_data,
|
|
551
|
+
size_t ld_categ,
|
|
552
|
+
uint8_t is_csc,
|
|
553
|
+
double *csc_values,
|
|
554
|
+
int *csc_indices,
|
|
555
|
+
int *csc_indptr
|
|
556
|
+
)
|
|
557
|
+
{
|
|
558
|
+
if (!isotree_model) {
|
|
559
|
+
cerr << "Passed NULL 'isotree_model' to 'isotree_serialize_to_file'." << std::endl;
|
|
560
|
+
return IsoTreeError;
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
IsolationForest *model = (IsolationForest*)isotree_model;
|
|
564
|
+
try
|
|
565
|
+
{
|
|
566
|
+
if (csc_indptr == NULL)
|
|
567
|
+
model->set_as_reference_points(numeric_data, categ_data, (bool)is_col_major,
|
|
568
|
+
nrows, ld_numeric, ld_categ,
|
|
569
|
+
(bool)with_distances);
|
|
570
|
+
else
|
|
571
|
+
model->set_as_reference_points(csc_values, csc_indices, csc_indptr, categ_data,
|
|
572
|
+
nrows, (bool)with_distances);
|
|
573
|
+
return IsoTreeSuccess;
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
catch (std::exception &e)
|
|
577
|
+
{
|
|
578
|
+
cerr << e.what();
|
|
579
|
+
cerr.flush();
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
return IsoTreeError;
|
|
583
|
+
}
|
|
584
|
+
|
|
585
|
+
ISOTREE_EXPORTED
|
|
586
|
+
size_t isotree_get_num_reference_points(void* isotree_model)
|
|
587
|
+
{
|
|
588
|
+
if (!isotree_model) {
|
|
589
|
+
cerr << "Passed NULL 'isotree_model' to 'isotree_get_n_reference_points'." << std::endl;
|
|
590
|
+
return 0;
|
|
591
|
+
}
|
|
592
|
+
|
|
593
|
+
IsolationForest *model = (IsolationForest*)isotree_model;
|
|
594
|
+
return model->get_num_reference_points();
|
|
595
|
+
}
|
|
596
|
+
|
|
597
|
+
ISOTREE_EXPORTED
|
|
598
|
+
int isotree_predict_distance_to_ref_points
|
|
599
|
+
(
|
|
600
|
+
void* isotree_model,
|
|
601
|
+
double *output_dist, /* <- output goes here */
|
|
602
|
+
uint8_t as_kernel,
|
|
603
|
+
uint8_t standardize,
|
|
604
|
+
size_t nrows,
|
|
605
|
+
uint8_t is_col_major,
|
|
606
|
+
double *numeric_data,
|
|
607
|
+
size_t ld_numeric,
|
|
608
|
+
int *categ_data,
|
|
609
|
+
size_t ld_categ,
|
|
610
|
+
double *csc_values,
|
|
611
|
+
int *csc_indices,
|
|
612
|
+
int *csc_indptr
|
|
613
|
+
)
|
|
614
|
+
{
|
|
615
|
+
if (!isotree_model) {
|
|
616
|
+
cerr << "Passed NULL 'isotree_model' to 'isotree_predict_distance_to_ref_points'." << std::endl;
|
|
617
|
+
return IsoTreeError;
|
|
618
|
+
}
|
|
619
|
+
if (!output_dist) {
|
|
620
|
+
cerr << "Passed NULL 'output_dist' to 'isotree_predict_distance_to_ref_points'." << std::endl;
|
|
621
|
+
return IsoTreeError;
|
|
622
|
+
}
|
|
623
|
+
|
|
624
|
+
IsolationForest *model = (IsolationForest*)isotree_model;
|
|
625
|
+
try
|
|
626
|
+
{
|
|
627
|
+
model->predict_distance_to_ref_points(numeric_data, categ_data,
|
|
628
|
+
csc_values, csc_indices, csc_indptr,
|
|
629
|
+
nrows, is_col_major, ld_numeric, ld_categ,
|
|
630
|
+
as_kernel, standardize,
|
|
631
|
+
output_dist);
|
|
632
|
+
return IsoTreeSuccess;
|
|
633
|
+
}
|
|
634
|
+
|
|
635
|
+
catch (std::exception &e)
|
|
636
|
+
{
|
|
637
|
+
cerr << e.what();
|
|
638
|
+
cerr.flush();
|
|
639
|
+
}
|
|
640
|
+
|
|
641
|
+
return IsoTreeError;
|
|
642
|
+
}
|
|
643
|
+
|
|
644
|
+
ISOTREE_EXPORTED
|
|
645
|
+
int isotree_serialize_to_file(const void *isotree_model, FILE *output)
|
|
646
|
+
{
|
|
647
|
+
if (!isotree_model) {
|
|
648
|
+
cerr << "Passed NULL 'isotree_model' to 'isotree_serialize_to_file'." << std::endl;
|
|
649
|
+
return IsoTreeError;
|
|
650
|
+
}
|
|
651
|
+
if (!output) {
|
|
652
|
+
cerr << "Passed invalid file handle to 'isotree_serialize_to_file'." << std::endl;
|
|
653
|
+
return IsoTreeError;
|
|
654
|
+
}
|
|
655
|
+
|
|
656
|
+
const IsolationForest *model = (const IsolationForest*)isotree_model;
|
|
657
|
+
|
|
658
|
+
try
|
|
659
|
+
{
|
|
660
|
+
model->serialize(output);
|
|
661
|
+
return IsoTreeSuccess;
|
|
662
|
+
}
|
|
663
|
+
|
|
664
|
+
catch (std::exception &e)
|
|
665
|
+
{
|
|
666
|
+
cerr << e.what();
|
|
667
|
+
cerr.flush();
|
|
668
|
+
}
|
|
669
|
+
|
|
670
|
+
return IsoTreeError;
|
|
671
|
+
}
|
|
672
|
+
|
|
673
|
+
ISOTREE_EXPORTED
|
|
674
|
+
void* isotree_deserialize_from_file(FILE *serialized_model, int nthreads)
|
|
675
|
+
{
|
|
676
|
+
if (!serialized_model) {
|
|
677
|
+
cerr << "Passed invalid file handle to 'isotree_deserialize_from_file'." << std::endl;
|
|
678
|
+
return nullptr;
|
|
679
|
+
}
|
|
680
|
+
|
|
681
|
+
try
|
|
682
|
+
{
|
|
683
|
+
#if __cplusplus >= 201402L
|
|
684
|
+
auto out = std::make_unique<IsolationForest>(IsolationForest::deserialize(serialized_model, nthreads));
|
|
685
|
+
#else
|
|
686
|
+
std::unique_ptr<IsolationForest> out(new IsolationForest(std::forward<IsolationForest>(
|
|
687
|
+
IsolationForest::deserialize(serialized_model, nthreads)
|
|
688
|
+
)));
|
|
689
|
+
#endif
|
|
690
|
+
return (void*)out.release();
|
|
691
|
+
}
|
|
692
|
+
|
|
693
|
+
catch (std::exception &e)
|
|
694
|
+
{
|
|
695
|
+
cerr << e.what();
|
|
696
|
+
cerr.flush();
|
|
697
|
+
return nullptr;
|
|
698
|
+
}
|
|
699
|
+
}
|
|
700
|
+
|
|
701
|
+
ISOTREE_EXPORTED
|
|
702
|
+
size_t isotree_serialize_get_raw_size(const void *isotree_model)
|
|
703
|
+
{
|
|
704
|
+
if (!isotree_model) {
|
|
705
|
+
cerr << "Passed NULL 'isotree_model' to 'isotree_serialize_get_raw_size'." << std::endl;
|
|
706
|
+
return 0;
|
|
707
|
+
}
|
|
708
|
+
|
|
709
|
+
try
|
|
710
|
+
{
|
|
711
|
+
const IsolationForest *model = (const IsolationForest*)isotree_model;
|
|
712
|
+
return determine_serialized_size_combined(
|
|
713
|
+
(!model->model.trees.empty())? &model->model : nullptr,
|
|
714
|
+
(!model->model_ext.hplanes.empty())? &model->model_ext : nullptr,
|
|
715
|
+
(!model->imputer.imputer_tree.empty())? &model->imputer : nullptr,
|
|
716
|
+
(!model->indexer.indices.empty())? &model->indexer : nullptr,
|
|
717
|
+
(size_t)0
|
|
718
|
+
);
|
|
719
|
+
}
|
|
720
|
+
|
|
721
|
+
catch (std::exception &e)
|
|
722
|
+
{
|
|
723
|
+
cerr << e.what();
|
|
724
|
+
cerr.flush();
|
|
725
|
+
return 0;
|
|
726
|
+
}
|
|
727
|
+
}
|
|
728
|
+
|
|
729
|
+
ISOTREE_EXPORTED
|
|
730
|
+
int isotree_serialize_to_raw(const void *isotree_model, char *output)
|
|
731
|
+
{
|
|
732
|
+
if (!isotree_model) {
|
|
733
|
+
cerr << "Passed NULL 'isotree_model' to 'isotree_serialize_to_raw'." << std::endl;
|
|
734
|
+
return IsoTreeError;
|
|
735
|
+
}
|
|
736
|
+
|
|
737
|
+
const IsolationForest *model = (const IsolationForest*)isotree_model;
|
|
738
|
+
|
|
739
|
+
try
|
|
740
|
+
{
|
|
741
|
+
serialize_combined(
|
|
742
|
+
(!model->model.trees.empty())? &model->model : nullptr,
|
|
743
|
+
(!model->model_ext.hplanes.empty())? &model->model_ext : nullptr,
|
|
744
|
+
(!model->imputer.imputer_tree.empty())? &model->imputer : nullptr,
|
|
745
|
+
(!model->indexer.indices.empty())? &model->indexer : nullptr,
|
|
746
|
+
(char*)nullptr,
|
|
747
|
+
(size_t)0,
|
|
748
|
+
output
|
|
749
|
+
);
|
|
750
|
+
return IsoTreeSuccess;
|
|
751
|
+
}
|
|
752
|
+
|
|
753
|
+
catch (std::exception &e)
|
|
754
|
+
{
|
|
755
|
+
cerr << e.what();
|
|
756
|
+
cerr.flush();
|
|
757
|
+
}
|
|
758
|
+
|
|
759
|
+
return IsoTreeError;
|
|
760
|
+
}
|
|
761
|
+
|
|
762
|
+
ISOTREE_EXPORTED
|
|
763
|
+
void* isotree_deserialize_from_raw(const char *serialized_model, int nthreads)
|
|
764
|
+
{
|
|
765
|
+
if (!serialized_model) {
|
|
766
|
+
cerr << "Passed NULL 'serialized_model' to 'isotree_deserialize_from_raw'." << std::endl;
|
|
767
|
+
return nullptr;
|
|
768
|
+
}
|
|
769
|
+
|
|
770
|
+
try
|
|
771
|
+
{
|
|
772
|
+
bool is_isotree_model = false;
|
|
773
|
+
bool is_compatible = false;
|
|
774
|
+
bool has_combined_objects = false;
|
|
775
|
+
bool has_IsoForest = false;
|
|
776
|
+
bool has_ExtIsoForest = false;
|
|
777
|
+
bool has_Imputer = false;
|
|
778
|
+
bool has_Indexer = false;
|
|
779
|
+
bool has_metadata = false;
|
|
780
|
+
size_t size_metadata = 0;
|
|
781
|
+
inspect_serialized_object(
|
|
782
|
+
serialized_model,
|
|
783
|
+
is_isotree_model,
|
|
784
|
+
is_compatible,
|
|
785
|
+
has_combined_objects,
|
|
786
|
+
has_IsoForest,
|
|
787
|
+
has_ExtIsoForest,
|
|
788
|
+
has_Imputer,
|
|
789
|
+
has_Indexer,
|
|
790
|
+
has_metadata,
|
|
791
|
+
size_metadata
|
|
792
|
+
);
|
|
793
|
+
if (is_isotree_model && is_compatible && !has_combined_objects)
|
|
794
|
+
throw std::runtime_error("Serialized model is not compatible.\n");
|
|
795
|
+
|
|
796
|
+
IsoForest model = IsoForest();
|
|
797
|
+
ExtIsoForest model_ext = ExtIsoForest();
|
|
798
|
+
Imputer imputer = Imputer();
|
|
799
|
+
TreesIndexer indexer = TreesIndexer();
|
|
800
|
+
|
|
801
|
+
deserialize_combined(
|
|
802
|
+
serialized_model,
|
|
803
|
+
&model,
|
|
804
|
+
&model_ext,
|
|
805
|
+
&imputer,
|
|
806
|
+
&indexer,
|
|
807
|
+
(char*)nullptr
|
|
808
|
+
);
|
|
809
|
+
|
|
810
|
+
if (!model.trees.size() && !model_ext.hplanes.size())
|
|
811
|
+
throw std::runtime_error("Error: model contains no trees.\n");
|
|
812
|
+
|
|
813
|
+
size_t ntrees;
|
|
814
|
+
size_t ndim = 3;
|
|
815
|
+
bool build_imputer = false;
|
|
816
|
+
|
|
817
|
+
if (!model.trees.empty()) {
|
|
818
|
+
ntrees = model.trees.size();
|
|
819
|
+
ndim = 1;
|
|
820
|
+
}
|
|
821
|
+
else {
|
|
822
|
+
ntrees = model_ext.hplanes.size();
|
|
823
|
+
}
|
|
824
|
+
if (!imputer.imputer_tree.empty()) {
|
|
825
|
+
if (imputer.imputer_tree.size() != ntrees)
|
|
826
|
+
throw std::runtime_error("Error: imputer has incorrect number of trees.\n");
|
|
827
|
+
build_imputer = true;
|
|
828
|
+
}
|
|
829
|
+
if (!indexer.indices.empty()) {
|
|
830
|
+
if (indexer.indices.size() != ntrees)
|
|
831
|
+
throw std::runtime_error("Error: indexer has incorrect number of trees.\n");
|
|
832
|
+
}
|
|
833
|
+
|
|
834
|
+
std::unique_ptr<IsolationForest> out(new IsolationForest());
|
|
835
|
+
out->nthreads = nthreads;
|
|
836
|
+
out->ndim = ndim;
|
|
837
|
+
out->ntrees = ntrees;
|
|
838
|
+
out->build_imputer = build_imputer;
|
|
839
|
+
|
|
840
|
+
if (!model.trees.empty())
|
|
841
|
+
out->get_model() = std::move(model);
|
|
842
|
+
else
|
|
843
|
+
out->get_model_ext() = std::move(model_ext);
|
|
844
|
+
if (!imputer.imputer_tree.empty())
|
|
845
|
+
out->get_imputer() = std::move(imputer);
|
|
846
|
+
if (!indexer.indices.empty())
|
|
847
|
+
out->indexer = std::move(indexer);
|
|
848
|
+
|
|
849
|
+
return out.release();
|
|
850
|
+
}
|
|
851
|
+
|
|
852
|
+
catch (std::exception &e)
|
|
853
|
+
{
|
|
854
|
+
cerr << e.what();
|
|
855
|
+
cerr.flush();
|
|
856
|
+
return nullptr;
|
|
857
|
+
}
|
|
858
|
+
}
|
|
859
|
+
|
|
860
|
+
ISOTREE_EXPORTED
|
|
861
|
+
int isotree_set_num_threads(void *isotree_model, int nthreads)
|
|
862
|
+
{
|
|
863
|
+
if (!isotree_model) {
|
|
864
|
+
cerr << "Passed NULL 'isotree_model' to 'isotree_set_num_threads'." << std::endl;
|
|
865
|
+
return IsoTreeError;
|
|
866
|
+
}
|
|
867
|
+
if (nthreads < 0) {
|
|
868
|
+
#ifndef _OPENMP
|
|
869
|
+
nthreads = 1;
|
|
870
|
+
#else
|
|
871
|
+
nthreads = omp_get_max_threads() + nthreads + 1;
|
|
872
|
+
#endif
|
|
873
|
+
}
|
|
874
|
+
IsolationForest *model = (IsolationForest*)isotree_model;
|
|
875
|
+
model->nthreads = nthreads;
|
|
876
|
+
return IsoTreeSuccess;
|
|
877
|
+
}
|
|
878
|
+
|
|
879
|
+
ISOTREE_EXPORTED
|
|
880
|
+
int isotree_get_num_threads(const void *isotree_model)
|
|
881
|
+
{
|
|
882
|
+
if (!isotree_model) {
|
|
883
|
+
cerr << "Passed NULL 'isotree_model' to 'isotree_get_num_threads'." << std::endl;
|
|
884
|
+
return -INT_MAX;
|
|
885
|
+
}
|
|
886
|
+
IsolationForest *model = (IsolationForest*)isotree_model;
|
|
887
|
+
return model->nthreads;
|
|
888
|
+
}
|
|
889
|
+
|
|
890
|
+
ISOTREE_EXPORTED
|
|
891
|
+
size_t isotree_get_ntrees(const void *isotree_model)
|
|
892
|
+
{
|
|
893
|
+
if (!isotree_model) {
|
|
894
|
+
cerr << "Passed NULL 'isotree_model' to 'isotree_get_ntrees'." << std::endl;
|
|
895
|
+
return SIZE_MAX;
|
|
896
|
+
}
|
|
897
|
+
const IsolationForest *model = (const IsolationForest*)isotree_model;
|
|
898
|
+
try {
|
|
899
|
+
return model->get_ntrees();
|
|
900
|
+
}
|
|
901
|
+
catch (std::exception &e) {
|
|
902
|
+
cerr << e.what();
|
|
903
|
+
cerr.flush();
|
|
904
|
+
return SIZE_MAX;
|
|
905
|
+
}
|
|
906
|
+
}
|
|
907
|
+
|
|
908
|
+
ISOTREE_EXPORTED
|
|
909
|
+
uint8_t isotree_build_indexer(void *isotree_model, const uint8_t with_distances)
|
|
910
|
+
{
|
|
911
|
+
if (!isotree_model) {
|
|
912
|
+
cerr << "Passed NULL 'isotree_model' to 'isotree_build_indexer'." << std::endl;
|
|
913
|
+
return IsoTreeError;
|
|
914
|
+
}
|
|
915
|
+
IsolationForest *model = (IsolationForest*)isotree_model;
|
|
916
|
+
if (!model->indexer.indices.empty()) return IsoTreeSuccess;
|
|
917
|
+
try {
|
|
918
|
+
model->build_indexer((const bool)with_distances);
|
|
919
|
+
}
|
|
920
|
+
catch (std::exception &e) {
|
|
921
|
+
model->indexer.indices.clear();
|
|
922
|
+
cerr << e.what();
|
|
923
|
+
cerr.flush();
|
|
924
|
+
return IsoTreeError;
|
|
925
|
+
}
|
|
926
|
+
return IsoTreeSuccess;
|
|
927
|
+
}
|
|
928
|
+
|
|
929
|
+
ISOTREE_EXPORTED
|
|
930
|
+
void* isotree_copy_model(void *isotree_model)
|
|
931
|
+
{
|
|
932
|
+
if (!isotree_model) {
|
|
933
|
+
cerr << "Passed NULL 'isotree_model' to 'isotree_copy_model'." << std::endl;
|
|
934
|
+
return nullptr;
|
|
935
|
+
}
|
|
936
|
+
IsolationForest *model = (IsolationForest*)isotree_model;
|
|
937
|
+
try {
|
|
938
|
+
std::unique_ptr<IsolationForest> model_copy(new IsolationForest());
|
|
939
|
+
*model_copy = *model;
|
|
940
|
+
return model_copy.release();
|
|
941
|
+
}
|
|
942
|
+
catch (std::exception &e) {
|
|
943
|
+
cerr << e.what();
|
|
944
|
+
cerr.flush();
|
|
945
|
+
return nullptr;
|
|
946
|
+
}
|
|
947
|
+
return nullptr;
|
|
948
|
+
}
|
|
949
|
+
|
|
950
|
+
|
|
951
|
+
} /* extern "C" */
|
|
952
|
+
|
|
953
|
+
#endif
|