isotree 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,929 @@
1
+ /* Isolation forests and variations thereof, with adjustments for incorporation
2
+ * of categorical variables and missing values.
3
+ * Writen for C++11 standard and aimed at being used in R and Python.
4
+ *
5
+ * This library is based on the following works:
6
+ * [1] Liu, Fei Tony, Kai Ming Ting, and Zhi-Hua Zhou.
7
+ * "Isolation forest."
8
+ * 2008 Eighth IEEE International Conference on Data Mining. IEEE, 2008.
9
+ * [2] Liu, Fei Tony, Kai Ming Ting, and Zhi-Hua Zhou.
10
+ * "Isolation-based anomaly detection."
11
+ * ACM Transactions on Knowledge Discovery from Data (TKDD) 6.1 (2012): 3.
12
+ * [3] Hariri, Sahand, Matias Carrasco Kind, and Robert J. Brunner.
13
+ * "Extended Isolation Forest."
14
+ * arXiv preprint arXiv:1811.02141 (2018).
15
+ * [4] Liu, Fei Tony, Kai Ming Ting, and Zhi-Hua Zhou.
16
+ * "On detecting clustered anomalies using SCiForest."
17
+ * Joint European Conference on Machine Learning and Knowledge Discovery in Databases. Springer, Berlin, Heidelberg, 2010.
18
+ * [5] https://sourceforge.net/projects/iforest/
19
+ * [6] https://math.stackexchange.com/questions/3388518/expected-number-of-paths-required-to-separate-elements-in-a-binary-tree
20
+ * [7] Quinlan, J. Ross. C4. 5: programs for machine learning. Elsevier, 2014.
21
+ * [8] Cortes, David. "Distance approximation using Isolation Forests." arXiv preprint arXiv:1910.12362 (2019).
22
+ * [9] Cortes, David. "Imputing missing values with unsupervised random trees." arXiv preprint arXiv:1911.06646 (2019).
23
+ *
24
+ * BSD 2-Clause License
25
+ * Copyright (c) 2019, David Cortes
26
+ * All rights reserved.
27
+ * Redistribution and use in source and binary forms, with or without
28
+ * modification, are permitted provided that the following conditions are met:
29
+ * * Redistributions of source code must retain the above copyright notice, this
30
+ * list of conditions and the following disclaimer.
31
+ * * Redistributions in binary form must reproduce the above copyright notice,
32
+ * this list of conditions and the following disclaimer in the documentation
33
+ * and/or other materials provided with the distribution.
34
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
35
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
36
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
37
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
38
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
39
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
40
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
41
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
42
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
43
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
44
+ */
45
+
46
+ /* Standard headers */
47
+ #include <stddef.h>
48
+ #include <math.h>
49
+ #include <limits.h>
50
+ #include <string.h>
51
+ #include <signal.h>
52
+ #include <vector>
53
+ #include <iterator>
54
+ #include <numeric>
55
+ #include <algorithm>
56
+ #include <random>
57
+ #include <unordered_set>
58
+ #include <unordered_map>
59
+ #include <memory>
60
+ #include <utility>
61
+ #include <cstdint>
62
+ #include <iostream>
63
+ #ifndef _FOR_R
64
+ #include <stdio.h>
65
+ #else
66
+ extern "C" {
67
+ #include <R_ext/Print.h>
68
+ }
69
+ #define printf Rprintf
70
+ #define fprintf(f, message) REprintf(message)
71
+ #endif
72
+ #ifdef _OPENMP
73
+ #include <omp.h>
74
+ #endif
75
+ #ifdef _ENABLE_CEREAL
76
+ #include <cereal/archives/binary.hpp>
77
+ #include <cereal/types/vector.hpp>
78
+ #include <sstream>
79
+ #include <string>
80
+ #include <fstream>
81
+ #endif
82
+
83
+ /* By default, will use Mersenne-Twister for RNG, but can be switched to something faster */
84
+ #ifdef _USE_MERSENNE_TWISTER
85
+ #if SIZE_MAX >= UINT64_MAX /* 64-bit systems or higher */
86
+ #define RNG_engine std::mt19937_64
87
+ #else /* 32-bit systems and non-standard architectures */
88
+ #define RNG_engine std::mt19937
89
+ #endif
90
+ #else
91
+ #define RNG_engine std::default_random_engine
92
+ #endif
93
+
94
+ /* Short functions */
95
+ #define ix_parent(ix) (((ix) - 1) / 2) /* integer division takes care of deciding left-right */
96
+ #define ix_child(ix) (2 * (ix) + 1)
97
+ /* https://stackoverflow.com/questions/101439/the-most-efficient-way-to-implement-an-integer-based-power-function-powint-int */
98
+ #define pow2(n) ( ((size_t) 1) << (n) )
99
+ #define square(x) ((x) * (x))
100
+ /* https://stackoverflow.com/questions/2249731/how-do-i-get-bit-by-bit-data-from-an-integer-value-in-c */
101
+ #define extract_bit(number, bit) (((number) >> (bit)) & 1)
102
+ #ifndef isinf
103
+ #define isinf std::isinf
104
+ #endif
105
+ #ifndef isnan
106
+ #define isnan std::isnan
107
+ #endif
108
+ #define is_na_or_inf(x) (isnan(x) || isinf(x))
109
+
110
+
111
+ /* Aliasing for compiler optimizations */
112
+ #if defined(__GNUG__) || defined(__GNUC__) || defined(_MSC_VER) || defined(__clang__) || defined(__INTEL_COMPILER)
113
+ #define restrict __restrict
114
+ #else
115
+ #define restrict
116
+ #endif
117
+
118
+ /* MSVC is stuck with an OpenMP version that's 19 years old at the time of writing and does not support unsigned iterators */
119
+ #ifdef _OPENMP
120
+ #if (_OPENMP < 200801) || defined(_WIN32) || defined(_WIN64) /* OpenMP < 3.0 */
121
+ #define size_t_for long
122
+ #else
123
+ #define size_t_for size_t
124
+ #endif
125
+ #else
126
+ #define size_t_for size_t
127
+ #endif
128
+
129
+
130
+ /* Apple at some point decided to drop OMP library and headersfrom its compiler distribution
131
+ * and to alias 'gcc' to 'clang', which work differently when given flags they cannot interpret,
132
+ * causing installation issues with pretty much all scientific software due to OMP headers that
133
+ * would normally do nothing. This piece of code is to allow compilation without OMP header. */
134
+ #ifndef _OPENMP
135
+ #define omp_get_thread_num() 0
136
+ #endif
137
+
138
+
139
+ /* For sparse matrices */
140
+ #ifdef _FOR_R
141
+ #define sparse_ix int
142
+ #else
143
+ #define sparse_ix size_t
144
+ #endif
145
+
146
+
147
+ /* Types used through the package */
148
+ typedef enum NewCategAction {Weighted, Smallest, Random} NewCategAction; /* Weighted means Impute in the extended model */
149
+ typedef enum MissingAction {Divide, Impute, Fail} MissingAction; /* Divide is only for non-extended model */
150
+ typedef enum ColType {Numeric, Categorical, NotUsed} ColType;
151
+ typedef enum CategSplit {SubSet, SingleCateg} CategSplit;
152
+ typedef enum GainCriterion {Averaged, Pooled, NoCrit} Criterion; /* For guided splits */
153
+ typedef enum CoefType {Uniform, Normal} CoefType; /* For extended model */
154
+ typedef enum UseDepthImp {Lower, Higher, Same} UseDepthImp; /* For NA imputation */
155
+ typedef enum WeighImpRows {Inverse, Prop, Flat} WeighImpRows; /* For NA imputation */
156
+
157
+ /* Notes about new categorical action:
158
+ * - For single-variable case, if using 'Smallest', can then pass data at prediction time
159
+ * having categories that were never in the training data (as an integer higher than 'ncat'
160
+ * for that column), but if using 'Random' or 'Weighted', these must be passed as NA (int < 0)
161
+ * - For extended case, 'Weighted' becomes a weighted imputation instead, and if using either
162
+ * 'Weighted' or 'Smallest', can pass newer, unseen categories at prediction time too.
163
+ * - If using 'Random', cannot pass new categories at prediction time.
164
+ * - If using 'Weighted' for single-variable case, cannot predict similarity with a value
165
+ * for MissingAction other than 'Divide'. */
166
+
167
+
168
+ /* Structs that are output (modified) from the main function */
169
+ typedef struct IsoTree {
170
+ ColType col_type = NotUsed; /* issues with uninitialized values passed to Cereal */
171
+ size_t col_num;
172
+ double num_split;
173
+ std::vector<char> cat_split;
174
+ int chosen_cat;
175
+ size_t tree_left;
176
+ size_t tree_right;
177
+ double pct_tree_left;
178
+ double score; /* will not be integer when there are weights or early stop */
179
+ double range_low = -HUGE_VAL;
180
+ double range_high = HUGE_VAL;
181
+ double remainder; /* only used for distance/similarity */
182
+
183
+ #ifdef _ENABLE_CEREAL
184
+ template<class Archive>
185
+ void serialize(Archive &archive)
186
+ {
187
+ archive(
188
+ this->col_type,
189
+ this->col_num,
190
+ this->num_split,
191
+ this->cat_split,
192
+ this->chosen_cat,
193
+ this->tree_left,
194
+ this->tree_right,
195
+ this->pct_tree_left,
196
+ this->score,
197
+ this->range_low,
198
+ this->range_high,
199
+ this->remainder
200
+ );
201
+ }
202
+ #endif
203
+
204
+ IsoTree() = default;
205
+
206
+ } IsoTree;
207
+
208
+ typedef struct IsoHPlane {
209
+ std::vector<size_t> col_num;
210
+ std::vector<ColType> col_type;
211
+ std::vector<double> coef;
212
+ std::vector<double> mean;
213
+ std::vector<std::vector<double>> cat_coef;
214
+ std::vector<int> chosen_cat;
215
+ std::vector<double> fill_val;
216
+ std::vector<double> fill_new;
217
+
218
+ double split_point;
219
+ size_t hplane_left;
220
+ size_t hplane_right;
221
+ double score; /* will not be integer when there are weights or early stop */
222
+ double range_low = -HUGE_VAL;
223
+ double range_high = HUGE_VAL;
224
+ double remainder; /* only used for distance/similarity */
225
+
226
+ #ifdef _ENABLE_CEREAL
227
+ template<class Archive>
228
+ void serialize(Archive &archive)
229
+ {
230
+ archive(
231
+ this->col_num,
232
+ this->col_type,
233
+ this->coef,
234
+ this->mean,
235
+ this->cat_coef,
236
+ this->chosen_cat,
237
+ this->fill_val,
238
+ this->fill_new,
239
+ this->split_point,
240
+ this->hplane_left,
241
+ this->hplane_right,
242
+ this->score,
243
+ this->range_low,
244
+ this->range_high,
245
+ this->remainder
246
+ );
247
+ }
248
+ #endif
249
+
250
+ IsoHPlane() = default;
251
+ } IsoHPlane;
252
+
253
+ /* Note: don't use long doubles in the outside outputs or there will be issues with MINGW in windows */
254
+
255
+
256
+ typedef struct IsoForest {
257
+ std::vector< std::vector<IsoTree> > trees;
258
+ NewCategAction new_cat_action;
259
+ CategSplit cat_split_type;
260
+ MissingAction missing_action;
261
+ double exp_avg_depth;
262
+ double exp_avg_sep;
263
+ size_t orig_sample_size;
264
+
265
+ #ifdef _ENABLE_CEREAL
266
+ template<class Archive>
267
+ void serialize(Archive &archive)
268
+ {
269
+ archive(
270
+ this->trees,
271
+ this->new_cat_action,
272
+ this->cat_split_type,
273
+ this->missing_action,
274
+ this->exp_avg_depth,
275
+ this->exp_avg_sep,
276
+ this->orig_sample_size
277
+ );
278
+ }
279
+ #endif
280
+
281
+ IsoForest() = default;
282
+ } IsoForest;
283
+
284
+ typedef struct ExtIsoForest {
285
+ std::vector< std::vector<IsoHPlane> > hplanes;
286
+ NewCategAction new_cat_action;
287
+ CategSplit cat_split_type;
288
+ MissingAction missing_action;
289
+ double exp_avg_depth;
290
+ double exp_avg_sep;
291
+ size_t orig_sample_size;
292
+
293
+ #ifdef _ENABLE_CEREAL
294
+ template<class Archive>
295
+ void serialize(Archive &archive)
296
+ {
297
+ archive(
298
+ this->hplanes,
299
+ this->new_cat_action,
300
+ this->cat_split_type,
301
+ this->missing_action,
302
+ this->exp_avg_depth,
303
+ this->exp_avg_sep,
304
+ this->orig_sample_size
305
+ );
306
+ }
307
+ #endif
308
+
309
+ ExtIsoForest() = default;
310
+ } ExtIsoForest;
311
+
312
+ typedef struct ImputeNode {
313
+ std::vector<double> num_sum;
314
+ std::vector<double> num_weight;
315
+ std::vector<std::vector<double>> cat_sum;
316
+ std::vector<double> cat_weight;
317
+ size_t parent;
318
+
319
+ #ifdef _ENABLE_CEREAL
320
+ template<class Archive>
321
+ void serialize(Archive &archive)
322
+ {
323
+ archive(
324
+ this->num_sum,
325
+ this->num_weight,
326
+ this->cat_sum,
327
+ this->cat_weight,
328
+ this->parent
329
+ );
330
+ }
331
+ #endif
332
+ ImputeNode() = default;
333
+
334
+ ImputeNode(size_t parent)
335
+ {
336
+ this->parent = parent;
337
+ }
338
+
339
+ } ImputeNode; /* this is for each tree node */
340
+
341
+ typedef struct Imputer {
342
+ size_t ncols_numeric;
343
+ size_t ncols_categ;
344
+ std::vector<int> ncat;
345
+ std::vector<std::vector<ImputeNode>> imputer_tree;
346
+ std::vector<double> col_means;
347
+ std::vector<int> col_modes;
348
+
349
+ #ifdef _ENABLE_CEREAL
350
+ template<class Archive>
351
+ void serialize(Archive &archive)
352
+ {
353
+ archive(
354
+ this->ncols_numeric,
355
+ this->ncols_categ,
356
+ this->ncat,
357
+ this->imputer_tree,
358
+ this->col_means,
359
+ this->col_modes
360
+ );
361
+ }
362
+ #endif
363
+
364
+ Imputer() = default;
365
+
366
+ } Imputer;
367
+
368
+
369
+ /* Structs that are only used internally */
370
+ typedef struct {
371
+ double* numeric_data;
372
+ size_t ncols_numeric;
373
+ int* categ_data;
374
+ int* ncat;
375
+ int max_categ;
376
+ size_t ncols_categ;
377
+ size_t nrows;
378
+ size_t ncols_tot;
379
+ double* sample_weights;
380
+ bool weight_as_sample;
381
+ double* col_weights;
382
+ double* Xc; /* only for sparse matrices */
383
+ sparse_ix* Xc_ind; /* only for sparse matrices */
384
+ sparse_ix* Xc_indptr; /* only for sparse matrices */
385
+ size_t log2_n; /* only when using weights for sampling */
386
+ size_t btree_offset; /* only when using weights for sampling */
387
+ std::vector<double> btree_weights_init; /* only when using weights for sampling */
388
+ std::vector<char> has_missing; /* only used when producing missing imputations on-the-fly */
389
+ size_t n_missing; /* only used when producing missing imputations on-the-fly */
390
+ } InputData;
391
+
392
+
393
+ typedef struct {
394
+ double* numeric_data;
395
+ int* categ_data;
396
+ size_t nrows;
397
+ double* Xc; /* only for sparse matrices */
398
+ sparse_ix* Xc_ind; /* only for sparse matrices */
399
+ sparse_ix* Xc_indptr; /* only for sparse matrices */
400
+ double* Xr; /* only for sparse matrices */
401
+ sparse_ix* Xr_ind; /* only for sparse matrices */
402
+ sparse_ix* Xr_indptr; /* only for sparse matrices */
403
+ } PredictionData;
404
+
405
+ typedef struct {
406
+ bool with_replacement;
407
+ size_t sample_size;
408
+ size_t ntrees;
409
+ size_t max_depth;
410
+ bool penalize_range;
411
+ uint64_t random_seed;
412
+ bool weigh_by_kurt;
413
+ double prob_pick_by_gain_avg;
414
+ double prob_split_by_gain_avg;
415
+ double prob_pick_by_gain_pl;
416
+ double prob_split_by_gain_pl;
417
+ double min_gain;
418
+ CategSplit cat_split_type;
419
+ NewCategAction new_cat_action;
420
+ MissingAction missing_action;
421
+ bool all_perm;
422
+
423
+ size_t ndim; /* only for extended model */
424
+ size_t ntry; /* only for extended model */
425
+ CoefType coef_type; /* only for extended model */
426
+ bool coef_by_prop; /* only for extended model */
427
+
428
+ bool calc_dist; /* checkbox for calculating distances on-the-fly */
429
+ bool calc_depth; /* checkbox for calculating depths on-the-fly */
430
+ bool impute_at_fit; /* checkbox for producing imputed missing values on-the-fly */
431
+
432
+ UseDepthImp depth_imp; /* only when building NA imputer */
433
+ WeighImpRows weigh_imp_rows; /* only when building NA imputer */
434
+ size_t min_imp_obs; /* only when building NA imputer */
435
+ } ModelParams;
436
+
437
+ typedef struct ImputedData {
438
+ std::vector<long double> num_sum;
439
+ std::vector<long double> num_weight;
440
+ std::vector<std::vector<long double>> cat_sum;
441
+ std::vector<long double> cat_weight;
442
+ std::vector<long double> sp_num_sum;
443
+ std::vector<long double> sp_num_weight;
444
+
445
+ std::vector<size_t> missing_num;
446
+ std::vector<size_t> missing_cat;
447
+ std::vector<sparse_ix> missing_sp;
448
+ size_t n_missing_num;
449
+ size_t n_missing_cat;
450
+ size_t n_missing_sp;
451
+
452
+ ImputedData() {};
453
+
454
+ ImputedData(InputData &input_data, size_t row);
455
+
456
+ } ImputedData;
457
+
458
+ typedef struct {
459
+ std::vector<size_t> ix_arr;
460
+ std::vector<size_t> ix_all;
461
+ RNG_engine rnd_generator;
462
+ std::uniform_int_distribution<size_t> runif;
463
+ std::uniform_real_distribution<double> rbin;
464
+ size_t st;
465
+ size_t end;
466
+ size_t st_NA;
467
+ size_t end_NA;
468
+ size_t split_ix;
469
+ std::unordered_map<size_t, double> weights_map;
470
+ std::vector<double> weights_arr; /* when not ignoring NAs and when using weights as density */
471
+ double xmin;
472
+ double xmax;
473
+ size_t npresent; /* 'npresent' and 'ncols_tried' are used interchangeable and for unrelated things */
474
+ bool unsplittable;
475
+ std::vector<bool> is_repeated;
476
+ std::vector<char> categs;
477
+ size_t ncols_tried; /* 'npresent' and 'ncols_tried' are used interchangeable and for unrelated things */
478
+ int ncat_tried;
479
+ std::vector<bool> cols_possible;
480
+ std::vector<double> btree_weights; /* only when using weights for sampling */
481
+ std::discrete_distribution<size_t> col_sampler; /* columns can get eliminated, keep a copy for each thread */
482
+
483
+ /* for split criterion */
484
+ std::vector<double> buffer_dbl;
485
+ std::vector<size_t> buffer_szt;
486
+ std::vector<char> buffer_chr;
487
+ double prob_split_type;
488
+ GainCriterion criterion;
489
+ double this_gain;
490
+ double this_split_point;
491
+ int this_categ;
492
+ std::vector<char> this_split_categ;
493
+ bool determine_split;
494
+
495
+ /* for the extended model */
496
+ size_t ntry;
497
+ size_t ntaken;
498
+ size_t ntaken_best;
499
+ bool tried_all;
500
+ size_t col_chosen;
501
+ ColType col_type;
502
+ double ext_sd;
503
+ std::vector<size_t> cols_shuffled;
504
+ std::vector<double> comb_val;
505
+ std::vector<size_t> col_take;
506
+ std::vector<ColType> col_take_type;
507
+ std::vector<double> ext_offset;
508
+ std::vector<double> ext_coef;
509
+ std::vector<double> ext_mean;
510
+ std::vector<double> ext_fill_val;
511
+ std::vector<double> ext_fill_new;
512
+ std::vector<int> chosen_cat;
513
+ std::vector<std::vector<double>> ext_cat_coef;
514
+ std::uniform_real_distribution<double> coef_unif;
515
+ std::normal_distribution<double> coef_norm;
516
+
517
+ /* for similarity/distance calculations */
518
+ std::vector<double> tmat_sep;
519
+
520
+ /* when calculating average depth on-the-fly */
521
+ std::vector<double> row_depths;
522
+
523
+ /* when imputing NAs on-the-fly */
524
+ std::vector<ImputedData> impute_vec;
525
+ std::unordered_map<size_t, ImputedData> impute_map;
526
+
527
+ } WorkerMemory;
528
+
529
+ typedef struct WorkerForSimilarity {
530
+ std::vector<size_t> ix_arr;
531
+ size_t st;
532
+ size_t end;
533
+ std::vector<double> weights_arr;
534
+ std::vector<double> comb_val;
535
+ std::vector<double> tmat_sep;
536
+ std::vector<double> rmat;
537
+ size_t n_from;
538
+ bool assume_full_distr; /* doesn't need to have one copy per worker */
539
+ } WorkerForSimilarity;
540
+
541
+ typedef struct {
542
+ size_t st;
543
+ size_t st_NA;
544
+ size_t end_NA;
545
+ size_t split_ix;
546
+ size_t end;
547
+ std::vector<size_t> ix_arr;
548
+ std::unordered_map<size_t, double> weights_map;
549
+ std::vector<double> weights_arr;
550
+ std::vector<bool> cols_possible;
551
+ std::discrete_distribution<size_t> col_sampler;
552
+ } RecursionState;
553
+
554
+ /* Function prototypes */
555
+
556
+ /* fit_model.cpp */
557
+ extern bool interrupt_switch;
558
+ int fit_iforest(IsoForest *model_outputs, ExtIsoForest *model_outputs_ext,
559
+ double numeric_data[], size_t ncols_numeric,
560
+ int categ_data[], size_t ncols_categ, int ncat[],
561
+ double Xc[], sparse_ix Xc_ind[], sparse_ix Xc_indptr[],
562
+ size_t ndim, size_t ntry, CoefType coef_type, bool coef_by_prop,
563
+ double sample_weights[], bool with_replacement, bool weight_as_sample,
564
+ size_t nrows, size_t sample_size, size_t ntrees, size_t max_depth,
565
+ bool limit_depth, bool penalize_range,
566
+ bool standardize_dist, double tmat[],
567
+ double output_depths[], bool standardize_depth,
568
+ double col_weights[], bool weigh_by_kurt,
569
+ double prob_pick_by_gain_avg, double prob_split_by_gain_avg,
570
+ double prob_pick_by_gain_pl, double prob_split_by_gain_pl,
571
+ double min_gain, MissingAction missing_action,
572
+ CategSplit cat_split_type, NewCategAction new_cat_action,
573
+ bool all_perm, Imputer *imputer, size_t min_imp_obs,
574
+ UseDepthImp depth_imp, WeighImpRows weigh_imp_rows, bool impute_at_fit,
575
+ uint64_t random_seed, int nthreads);
576
+ int add_tree(IsoForest *model_outputs, ExtIsoForest *model_outputs_ext,
577
+ double numeric_data[], size_t ncols_numeric,
578
+ int categ_data[], size_t ncols_categ, int ncat[],
579
+ double Xc[], sparse_ix Xc_ind[], sparse_ix Xc_indptr[],
580
+ size_t ndim, size_t ntry, CoefType coef_type, bool coef_by_prop,
581
+ double sample_weights[], size_t nrows, size_t max_depth,
582
+ bool limit_depth, bool penalize_range,
583
+ double col_weights[], bool weigh_by_kurt,
584
+ double prob_pick_by_gain_avg, double prob_split_by_gain_avg,
585
+ double prob_pick_by_gain_pl, double prob_split_by_gain_pl,
586
+ double min_gain, MissingAction missing_action,
587
+ CategSplit cat_split_type, NewCategAction new_cat_action,
588
+ UseDepthImp depth_imp, WeighImpRows weigh_imp_rows,
589
+ bool all_perm, std::vector<ImputeNode> *impute_nodes, size_t min_imp_obs,
590
+ uint64_t random_seed);
591
+ void fit_itree(std::vector<IsoTree> *tree_root,
592
+ std::vector<IsoHPlane> *hplane_root,
593
+ WorkerMemory &workspace,
594
+ InputData &input_data,
595
+ ModelParams &model_params,
596
+ std::vector<ImputeNode> *impute_nodes,
597
+ size_t tree_num);
598
+
599
+ /* isoforest.cpp */
600
+ void split_itree_recursive(std::vector<IsoTree> &trees,
601
+ WorkerMemory &workspace,
602
+ InputData &input_data,
603
+ ModelParams &model_params,
604
+ std::vector<ImputeNode> *impute_nodes,
605
+ size_t curr_depth);
606
+
607
+ /* extended.cpp */
608
+ void split_hplane_recursive(std::vector<IsoHPlane> &hplanes,
609
+ WorkerMemory &workspace,
610
+ InputData &input_data,
611
+ ModelParams &model_params,
612
+ std::vector<ImputeNode> *impute_nodes,
613
+ size_t curr_depth);
614
+ void add_chosen_column(WorkerMemory &workspace, InputData &input_data, ModelParams &model_params,
615
+ std::vector<bool> &col_is_taken, std::unordered_set<size_t> &col_is_taken_s);
616
+ void shrink_to_fit_hplane(IsoHPlane &hplane, bool clear_vectors);
617
+ void simplify_hplane(IsoHPlane &hplane, WorkerMemory &workspace, InputData &input_data, ModelParams &model_params);
618
+
619
+
620
+ /* predict.cpp */
621
+ void predict_iforest(double numeric_data[], int categ_data[],
622
+ double Xc[], sparse_ix Xc_ind[], sparse_ix Xc_indptr[],
623
+ double Xr[], sparse_ix Xr_ind[], sparse_ix Xr_indptr[],
624
+ size_t nrows, int nthreads, bool standardize,
625
+ IsoForest *model_outputs, ExtIsoForest *model_outputs_ext,
626
+ double output_depths[], sparse_ix tree_num[]);
627
+ void traverse_itree_no_recurse(std::vector<IsoTree> &tree,
628
+ IsoForest &model_outputs,
629
+ PredictionData &prediction_data,
630
+ double &output_depth,
631
+ sparse_ix *restrict tree_num,
632
+ size_t row);
633
+ double traverse_itree(std::vector<IsoTree> &tree,
634
+ IsoForest &model_outputs,
635
+ PredictionData &prediction_data,
636
+ std::vector<ImputeNode> *impute_nodes,
637
+ ImputedData *imputed_data,
638
+ double curr_weight,
639
+ size_t row,
640
+ sparse_ix *restrict tree_num,
641
+ size_t curr_lev);
642
+ void traverse_hplane_fast(std::vector<IsoHPlane> &hplane,
643
+ ExtIsoForest &model_outputs,
644
+ PredictionData &prediction_data,
645
+ double &output_depth,
646
+ sparse_ix *restrict tree_num,
647
+ size_t row);
648
+ void traverse_hplane(std::vector<IsoHPlane> &hplane,
649
+ ExtIsoForest &model_outputs,
650
+ PredictionData &prediction_data,
651
+ double &output_depth,
652
+ std::vector<ImputeNode> *impute_nodes,
653
+ ImputedData *imputed_data,
654
+ sparse_ix *restrict tree_num,
655
+ size_t row);
656
+ double extract_spC(PredictionData &prediction_data, size_t row, size_t col_num);
657
+ double extract_spR(PredictionData &prediction_data, sparse_ix *row_st, sparse_ix *row_end, size_t col_num);
658
+ void get_num_nodes(IsoForest &model_outputs, sparse_ix *restrict n_nodes, sparse_ix *restrict n_terminal, int nthreads);
659
+ void get_num_nodes(ExtIsoForest &model_outputs, sparse_ix *restrict n_nodes, sparse_ix *restrict n_terminal, int nthreads);
660
+
661
+ /* dist.cpp */
662
+ void calc_similarity(double numeric_data[], int categ_data[],
663
+ double Xc[], sparse_ix Xc_ind[], sparse_ix Xc_indptr[],
664
+ size_t nrows, int nthreads, bool assume_full_distr, bool standardize_dist,
665
+ IsoForest *model_outputs, ExtIsoForest *model_outputs_ext,
666
+ double tmat[], double rmat[], size_t n_from);
667
+ void traverse_tree_sim(WorkerForSimilarity &workspace,
668
+ PredictionData &prediction_data,
669
+ IsoForest &model_outputs,
670
+ std::vector<IsoTree> &trees,
671
+ size_t curr_tree);
672
+ void traverse_hplane_sim(WorkerForSimilarity &workspace,
673
+ PredictionData &prediction_data,
674
+ ExtIsoForest &model_outputs,
675
+ std::vector<IsoHPlane> &hplanes,
676
+ size_t curr_tree);
677
+ void gather_sim_result(std::vector<WorkerForSimilarity> *worker_memory,
678
+ std::vector<WorkerMemory> *worker_memory_m,
679
+ PredictionData *prediction_data, InputData *input_data,
680
+ IsoForest *model_outputs, ExtIsoForest *model_outputs_ext,
681
+ double *restrict tmat, double *restrict rmat, size_t n_from,
682
+ size_t ntrees, bool assume_full_distr,
683
+ bool standardize_dist, int nthreads);
684
+ void initialize_worker_for_sim(WorkerForSimilarity &workspace,
685
+ PredictionData &prediction_data,
686
+ IsoForest *model_outputs,
687
+ ExtIsoForest *model_outputs_ext,
688
+ size_t n_from,
689
+ bool assume_full_distr);
690
+
691
+ /* impute.cpp */
692
+ void impute_missing_values(double numeric_data[], int categ_data[],
693
+ double Xr[], sparse_ix Xr_ind[], sparse_ix Xr_indptr[],
694
+ size_t nrows, int nthreads,
695
+ IsoForest *model_outputs, ExtIsoForest *model_outputs_ext,
696
+ Imputer &imputer);
697
+ void initialize_imputer(Imputer &imputer, InputData &input_data, size_t ntrees, int nthreads);
698
+ void build_impute_node(ImputeNode &imputer, WorkerMemory &workspace,
699
+ InputData &input_data, ModelParams &model_params,
700
+ std::vector<ImputeNode> &imputer_tree,
701
+ size_t curr_depth, size_t min_imp_obs);
702
+ void shrink_impute_node(ImputeNode &imputer);
703
+ void drop_nonterminal_imp_node(std::vector<ImputeNode> &imputer_tree,
704
+ std::vector<IsoTree> *trees,
705
+ std::vector<IsoHPlane> *hplanes);
706
+ void combine_imp_single(ImputedData &imp_addfrom, ImputedData &imp_addto);
707
+ void combine_tree_imputations(WorkerMemory &workspace,
708
+ std::vector<ImputedData> &impute_vec,
709
+ std::unordered_map<size_t, ImputedData> &impute_map,
710
+ std::vector<char> &has_missing,
711
+ int nthreads);
712
+ void add_from_impute_node(ImputeNode &imputer, ImputedData &imputed_data, double w);
713
+ void add_from_impute_node(ImputeNode &imputer, WorkerMemory &workspace, InputData &input_data);
714
+ template <class imp_arr>
715
+ void apply_imputation_results(imp_arr &impute_vec,
716
+ Imputer &imputer,
717
+ InputData &input_data,
718
+ int nthreads);
719
+ void apply_imputation_results(std::vector<ImputedData> &impute_vec,
720
+ std::unordered_map<size_t, ImputedData> &impute_map,
721
+ Imputer &imputer,
722
+ InputData &input_data,
723
+ int nthreads);
724
+ void apply_imputation_results(PredictionData &prediction_data,
725
+ ImputedData &imp,
726
+ Imputer &imputer,
727
+ size_t row);
728
+ void initialize_impute_calc(ImputedData &imp, InputData &input_data, size_t row);
729
+ void initialize_impute_calc(ImputedData &imp, PredictionData &prediction_data, Imputer &imputer, size_t row);
730
+ void allocate_imp_vec(std::vector<ImputedData> &impute_vec, InputData &input_data, int nthreads);
731
+ void allocate_imp_map(std::unordered_map<size_t, ImputedData> &impute_map, InputData &input_data);
732
+ void allocate_imp(InputData &input_data,
733
+ std::vector<ImputedData> &impute_vec,
734
+ std::unordered_map<size_t, ImputedData> &impute_map,
735
+ int nthreads);
736
+ void check_for_missing(InputData &input_data,
737
+ std::vector<ImputedData> &impute_vec,
738
+ std::unordered_map<size_t, ImputedData> &impute_map,
739
+ int nthreads);
740
+ size_t check_for_missing(PredictionData &prediction_data,
741
+ Imputer &imputer,
742
+ size_t ix_arr[],
743
+ int nthreads);
744
+
745
+ /* helpers_iforest.cpp */
746
+ void decide_column(size_t ncols_numeric, size_t ncols_categ, size_t &col_chosen, ColType &col_type,
747
+ RNG_engine &rnd_generator, std::uniform_int_distribution<size_t> &runif,
748
+ std::discrete_distribution<size_t> &col_sampler);
749
+ void add_unsplittable_col(WorkerMemory &workspace, IsoTree &tree, InputData &input_data);
750
+ void add_unsplittable_col(WorkerMemory &workspace, InputData &input_data);
751
+ bool check_is_not_unsplittable_col(WorkerMemory &workspace, IsoTree &tree, InputData &input_data);
752
+ void get_split_range(WorkerMemory &workspace, InputData &input_data, ModelParams &model_params, IsoTree &tree);
753
+ void get_split_range(WorkerMemory &workspace, InputData &input_data, ModelParams &model_params);
754
+ int choose_cat_from_present(WorkerMemory &workspace, InputData &input_data, size_t col_num);
755
+ void update_col_sampler(WorkerMemory &workspace, InputData &input_data);
756
+ bool is_col_taken(std::vector<bool> &col_is_taken, std::unordered_set<size_t> &col_is_taken_s,
757
+ InputData &input_data, size_t col_num, ColType col_type);
758
+ void set_col_as_taken(std::vector<bool> &col_is_taken, std::unordered_set<size_t> &col_is_taken_s,
759
+ InputData &input_data, size_t col_num, ColType col_type);
760
+ void add_separation_step(WorkerMemory &workspace, InputData &input_data, double remainder);
761
+ void add_remainder_separation_steps(WorkerMemory &workspace, InputData &input_data, long double sum_weight);
762
+ void remap_terminal_trees(IsoForest *model_outputs, ExtIsoForest *model_outputs_ext,
763
+ PredictionData &prediction_data, sparse_ix *restrict tree_num, int nthreads);
764
+ void backup_recursion_state(WorkerMemory &workspace, RecursionState &recursion_state);
765
+ void restore_recursion_state(WorkerMemory &workspace, RecursionState &recursion_state);
766
+
767
+
768
+ /* utils.cpp */
769
+ size_t log2ceil(size_t x);
770
+ double harmonic(size_t n);
771
+ double harmonic_recursive(double a, double b);
772
+ double expected_avg_depth(size_t sample_size);
773
+ double expected_avg_depth(long double approx_sample_size);
774
+ double expected_separation_depth(size_t n);
775
+ double expected_separation_depth_hotstart(double curr, size_t n_curr, size_t n_final);
776
+ double expected_separation_depth(long double n);
777
+ void increase_comb_counter(size_t ix_arr[], size_t st, size_t end, size_t n, double counter[], double exp_remainder);
778
+ void increase_comb_counter(size_t ix_arr[], size_t st, size_t end, size_t n,
779
+ double *restrict counter, double *restrict weights, double exp_remainder);
780
+ void increase_comb_counter(size_t ix_arr[], size_t st, size_t end, size_t n,
781
+ double counter[], std::unordered_map<size_t, double> &weights, double exp_remainder);
782
+ void increase_comb_counter_in_groups(size_t ix_arr[], size_t st, size_t end, size_t split_ix, size_t n,
783
+ double counter[], double exp_remainder);
784
+ void increase_comb_counter_in_groups(size_t ix_arr[], size_t st, size_t end, size_t split_ix, size_t n,
785
+ double *restrict counter, double *restrict weights, double exp_remainder);
786
+ void tmat_to_dense(double *restrict tmat, double *restrict dmat, size_t n, bool diag_to_one);
787
+ double calc_sd_raw(size_t cnt, long double sum, long double sum_sq);
788
+ long double calc_sd_raw_l(size_t cnt, long double sum, long double sum_sq);
789
+ void build_btree_sampler(std::vector<double> &btree_weights, double *restrict sample_weights,
790
+ size_t nrows, size_t &log2_n, size_t &btree_offset);
791
+ void sample_random_rows(std::vector<size_t> &ix_arr, size_t nrows, bool with_replacement,
792
+ RNG_engine &rnd_generator, std::vector<size_t> &ix_all,
793
+ double sample_weights[], std::vector<double> &btree_weights,
794
+ size_t log2_n, size_t btree_offset, std::vector<bool> &is_repeated);
795
+ void weighted_shuffle(size_t *restrict outp, size_t n, double *restrict weights, double *restrict buffer_arr, RNG_engine &rnd_generator);
796
+ size_t divide_subset_split(size_t ix_arr[], double x[], size_t st, size_t end, double split_point);
797
+ void divide_subset_split(size_t ix_arr[], double x[], size_t st, size_t end, double split_point,
798
+ MissingAction missing_action, size_t &st_NA, size_t &end_NA, size_t &split_ix);
799
+ void divide_subset_split(size_t ix_arr[], size_t st, size_t end, size_t col_num,
800
+ double Xc[], sparse_ix Xc_ind[], sparse_ix Xc_indptr[], double split_point,
801
+ MissingAction missing_action, size_t &st_NA, size_t &end_NA, size_t &split_ix);
802
+ void divide_subset_split(size_t ix_arr[], int x[], size_t st, size_t end, char split_categ[],
803
+ MissingAction missing_action, size_t &st_NA, size_t &end_NA, size_t &split_ix);
804
+ void divide_subset_split(size_t ix_arr[], int x[], size_t st, size_t end, char split_categ[],
805
+ int ncat, MissingAction missing_action, NewCategAction new_cat_action,
806
+ bool move_new_to_left, size_t &st_NA, size_t &end_NA, size_t &split_ix);
807
+ void divide_subset_split(size_t ix_arr[], int x[], size_t st, size_t end, int split_categ,
808
+ MissingAction missing_action, size_t &st_NA, size_t &end_NA, size_t &split_ix);
809
+ void divide_subset_split(size_t ix_arr[], int x[], size_t st, size_t end,
810
+ MissingAction missing_action, NewCategAction new_cat_action,
811
+ bool move_new_to_left, size_t &st_NA, size_t &end_NA, size_t &split_ix);
812
+ void get_range(size_t ix_arr[], double x[], size_t st, size_t end,
813
+ MissingAction missing_action, double &xmin, double &xmax, bool &unsplittable);
814
+ void get_range(size_t ix_arr[], size_t st, size_t end, size_t col_num,
815
+ double Xc[], sparse_ix Xc_ind[], sparse_ix Xc_indptr[],
816
+ MissingAction missing_action, double &xmin, double &xmax, bool &unsplittable);
817
+ void get_categs(size_t ix_arr[], int x[], size_t st, size_t end, int ncat,
818
+ MissingAction missing_action, char categs[], size_t &npresent, bool &unsplittable);
819
+ long double calculate_sum_weights(std::vector<size_t> &ix_arr, size_t st, size_t end, size_t curr_depth,
820
+ std::vector<double> &weights_arr, std::unordered_map<size_t, double> &weights_map);
821
+ void set_interrup_global_variable(int s);
822
+ int return_EXIT_SUCCESS();
823
+ int return_EXIT_FAILURE();
824
+
825
+
826
+
827
+ size_t move_NAs_to_front(size_t ix_arr[], size_t st, size_t end, double x[]);
828
+ size_t move_NAs_to_front(size_t ix_arr[], size_t st, size_t end, size_t col_num, double Xc[], size_t Xc_ind[], size_t Xc_indptr[]);
829
+ size_t move_NAs_to_front(size_t ix_arr[], size_t st, size_t end, int x[]);
830
+ size_t center_NAs(size_t *restrict ix_arr, size_t st_left, size_t st, size_t curr_pos);
831
+ void todense(size_t ix_arr[], size_t st, size_t end,
832
+ size_t col_num, double *restrict Xc, sparse_ix Xc_ind[], sparse_ix Xc_indptr[],
833
+ double *restrict buffer_arr);
834
+
835
+ /* mult.cpp */
836
+ void calc_mean_and_sd(size_t ix_arr[], size_t st, size_t end, double *restrict x,
837
+ MissingAction missing_action, double &x_sd, double &x_mean);
838
+ void calc_mean_and_sd(size_t ix_arr[], size_t st, size_t end, size_t col_num,
839
+ double *restrict Xc, sparse_ix Xc_ind[], sparse_ix Xc_indptr[],
840
+ double &x_sd, double &x_mean);
841
+ void add_linear_comb(size_t ix_arr[], size_t st, size_t end, double *restrict res,
842
+ double *restrict x, double &coef, double x_sd, double x_mean, double &fill_val,
843
+ MissingAction missing_action, double *restrict buffer_arr,
844
+ size_t *restrict buffer_NAs, bool first_run);
845
+ void add_linear_comb(size_t *restrict ix_arr, size_t st, size_t end, size_t col_num, double *restrict res,
846
+ double *restrict Xc, sparse_ix *restrict Xc_ind, sparse_ix *restrict Xc_indptr,
847
+ double &coef, double x_sd, double x_mean, double &fill_val, MissingAction missing_action,
848
+ double *restrict buffer_arr, size_t *restrict buffer_NAs, bool first_run);
849
+ void add_linear_comb(size_t *restrict ix_arr, size_t st, size_t end, double *restrict res,
850
+ int x[], int ncat, double *restrict cat_coef, double single_cat_coef, int chosen_cat,
851
+ double &fill_val, double &fill_new, size_t *restrict buffer_cnt, size_t *restrict buffer_pos,
852
+ NewCategAction new_cat_action, MissingAction missing_action, CategSplit cat_split_type, bool first_run);
853
+
854
+ /* crit.cpp */
855
+ double calc_kurtosis(size_t ix_arr[], size_t st, size_t end, double x[], MissingAction missing_action);
856
+ double calc_kurtosis(size_t ix_arr[], size_t st, size_t end, size_t col_num,
857
+ double Xc[], sparse_ix Xc_ind[], sparse_ix Xc_indptr[],
858
+ MissingAction missing_action);
859
+ double calc_kurtosis(size_t ix_arr[], size_t st, size_t end, int x[], int ncat, size_t buffer_cnt[], double buffer_prob[],
860
+ MissingAction missing_action, CategSplit cat_split_type, RNG_engine &rnd_generator);
861
+ double expected_sd_cat(double p[], size_t n, size_t pos[]);
862
+ double expected_sd_cat(size_t counts[], double p[], size_t n, size_t pos[]);
863
+ double expected_sd_cat_single(size_t counts[], double p[], size_t n, size_t pos[], size_t cat_exclude, size_t cnt);
864
+ double numeric_gain(size_t cnt_left, size_t cnt_right,
865
+ long double sum_left, long double sum_right,
866
+ long double sum_sq_left, long double sum_sq_right,
867
+ double sd_full, long double cnt);
868
+ double numeric_gain_no_div(size_t cnt_left, size_t cnt_right,
869
+ long double sum_left, long double sum_right,
870
+ long double sum_sq_left, long double sum_sq_right,
871
+ double sd_full, long double cnt);
872
+ double categ_gain(size_t cnt_left, size_t cnt_right,
873
+ long double s_left, long double s_right,
874
+ long double base_info, long double cnt);
875
+ double eval_guided_crit(double *restrict x, size_t n, GainCriterion criterion, double min_gain,
876
+ double &split_point, double &xmin, double &xmax);
877
+ double eval_guided_crit(size_t *restrict ix_arr, size_t st, size_t end, double *restrict x,
878
+ size_t &split_ix, double &split_point, double &xmin, double &xmax,
879
+ GainCriterion criterion, double min_gain, MissingAction missing_action);
880
+ double eval_guided_crit(size_t ix_arr[], size_t st, size_t end,
881
+ size_t col_num, double Xc[], sparse_ix Xc_ind[], sparse_ix Xc_indptr[],
882
+ double buffer_arr[], size_t buffer_pos[],
883
+ double &split_point, double &xmin, double &xmax,
884
+ GainCriterion criterion, double min_gain, MissingAction missing_action);
885
+ double eval_guided_crit(size_t *restrict ix_arr, size_t st, size_t end, int *restrict x, int ncat,
886
+ size_t *restrict buffer_cnt, size_t *restrict buffer_pos, double *restrict buffer_prob,
887
+ int &chosen_cat, char *restrict split_categ, char *restrict buffer_split,
888
+ GainCriterion criterion, double min_gain, bool all_perm, MissingAction missing_action, CategSplit cat_split_type);
889
+
890
+ /* merge_models.cpp */
891
+ void merge_models(IsoForest* model, IsoForest* other,
892
+ ExtIsoForest* ext_model, ExtIsoForest* ext_other,
893
+ Imputer* imputer, Imputer* iother);
894
+
895
+ #ifdef _ENABLE_CEREAL
896
+ /* serialize.cpp */
897
+ void serialize_isoforest(IsoForest &model, std::ostream &output);
898
+ void serialize_isoforest(IsoForest &model, const char *output_file_path);
899
+ std::string serialize_isoforest(IsoForest &model);
900
+ void deserialize_isoforest(IsoForest &output_obj, std::istream &serialized);
901
+ void deserialize_isoforest(IsoForest &output_obj, const char *input_file_path);
902
+ void deserialize_isoforest(IsoForest &output_obj, std::string &serialized, bool move_str);
903
+ void serialize_ext_isoforest(ExtIsoForest &model, std::ostream &output);
904
+ void serialize_ext_isoforest(ExtIsoForest &model, const char *output_file_path);
905
+ std::string serialize_ext_isoforest(ExtIsoForest &model);
906
+ void deserialize_ext_isoforest(ExtIsoForest &output_obj, std::istream &serialized);
907
+ void deserialize_ext_isoforest(ExtIsoForest &output_obj, const char *input_file_path);
908
+ void deserialize_ext_isoforest(ExtIsoForest &output_obj, std::string &serialized, bool move_str);
909
+ void serialize_imputer(Imputer &imputer, std::ostream &output);
910
+ void serialize_imputer(Imputer &imputer, const char *output_file_path);
911
+ std::string serialize_imputer(Imputer &imputer);
912
+ void deserialize_imputer(Imputer &output_obj, std::istream &serialized);
913
+ void deserialize_imputer(Imputer &output_obj, const char *input_file_path);
914
+ void deserialize_imputer(Imputer &output_obj, std::string &serialized, bool move_str);
915
+ #ifdef _MSC_VER
916
+ void serialize_isoforest(IsoForest &model, const wchar_t *output_file_path);
917
+ void deserialize_isoforest(IsoForest &output_obj, const wchar_t *input_file_path);
918
+ void serialize_ext_isoforest(ExtIsoForest &model, const wchar_t *output_file_path);
919
+ void deserialize_ext_isoforest(ExtIsoForest &output_obj, const wchar_t *input_file_path);
920
+ void serialize_imputer(Imputer &imputer, const wchar_t *output_file_path);
921
+ void deserialize_imputer(Imputer &output_obj, const wchar_t *input_file_path);
922
+ #endif /* _MSC_VER */
923
+ bool has_msvc();
924
+ #endif /* _ENABLE_CEREAL */
925
+
926
+ /* dealloc.cpp */
927
+ void dealloc_IsoForest(IsoForest &model_outputs);
928
+ void dealloc_IsoExtForest(ExtIsoForest &model_outputs_ext);
929
+ void dealloc_Imputer(Imputer &imputer);