isotree 0.3.0 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/LICENSE.txt +2 -2
- data/lib/isotree/version.rb +1 -1
- data/vendor/isotree/LICENSE +1 -1
- data/vendor/isotree/README.md +5 -5
- data/vendor/isotree/include/isotree.hpp +5 -0
- data/vendor/isotree/inst/COPYRIGHTS +70 -0
- data/vendor/isotree/src/RcppExports.cpp +130 -66
- data/vendor/isotree/src/Rwrapper.cpp +831 -346
- data/vendor/isotree/src/c_interface.cpp +6 -1
- data/vendor/isotree/src/crit.hpp +4 -0
- data/vendor/isotree/src/digamma.hpp +184 -0
- data/vendor/isotree/src/dist.hpp +2 -2
- data/vendor/isotree/src/fit_model.hpp +2 -2
- data/vendor/isotree/src/helpers_iforest.hpp +1 -0
- data/vendor/isotree/src/impute.hpp +40 -12
- data/vendor/isotree/src/isotree.hpp +19 -10
- data/vendor/isotree/src/mult.hpp +1 -1
- data/vendor/isotree/src/oop_interface.cpp +7 -5
- data/vendor/isotree/src/python_helpers.hpp +10 -30
- data/vendor/isotree/src/robinmap/README.md +1 -1
- data/vendor/isotree/src/robinmap/include/tsl/robin_hash.h +29 -10
- data/vendor/isotree/src/serialize.cpp +32 -16
- data/vendor/isotree/src/sql.cpp +2 -2
- data/vendor/isotree/src/utils.hpp +15 -37
- data/vendor/isotree/src/xoshiro.hpp +3 -7
- metadata +4 -3
@@ -798,13 +798,18 @@ void* isotree_deserialize_from_raw(const char *serialized_model, int nthreads)
|
|
798
798
|
Imputer imputer = Imputer();
|
799
799
|
TreesIndexer indexer = TreesIndexer();
|
800
800
|
|
801
|
+
std::unique_ptr<char[]> buffer_metadata;
|
802
|
+
if (size_metadata) {
|
803
|
+
buffer_metadata = std::unique_ptr<char[]>(new char[size_metadata]);
|
804
|
+
}
|
805
|
+
|
801
806
|
deserialize_combined(
|
802
807
|
serialized_model,
|
803
808
|
&model,
|
804
809
|
&model_ext,
|
805
810
|
&imputer,
|
806
811
|
&indexer,
|
807
|
-
(char*)nullptr
|
812
|
+
size_metadata? buffer_metadata.get() : (char*)nullptr
|
808
813
|
);
|
809
814
|
|
810
815
|
if (!model.trees.size() && !model_ext.hplanes.size())
|
data/vendor/isotree/src/crit.hpp
CHANGED
@@ -792,6 +792,7 @@ double calc_kurtosis_internal(size_t cnt, int x[], int ncat, size_t buffer_cnt[]
|
|
792
792
|
}
|
793
793
|
}
|
794
794
|
|
795
|
+
unreachable();
|
795
796
|
return -1; /* this will never be reached, but CRAN complains otherwise */
|
796
797
|
}
|
797
798
|
|
@@ -940,6 +941,7 @@ double calc_kurtosis_weighted_internal(std::vector<ldouble_safe> &buffer_cnt, in
|
|
940
941
|
}
|
941
942
|
}
|
942
943
|
|
944
|
+
unreachable();
|
943
945
|
return -1; /* this will never be reached, but CRAN complains otherwise */
|
944
946
|
}
|
945
947
|
|
@@ -2858,6 +2860,7 @@ double find_split_dens_longform(int *restrict x, int ncat, size_t *restrict ix_a
|
|
2858
2860
|
}
|
2859
2861
|
|
2860
2862
|
/* This will not be reached, but CRAN might complain otherwise */
|
2863
|
+
unreachable();
|
2861
2864
|
return -HUGE_VAL;
|
2862
2865
|
}
|
2863
2866
|
|
@@ -3052,6 +3055,7 @@ double find_split_dens_longform_weighted(int *restrict x, int ncat, size_t *rest
|
|
3052
3055
|
}
|
3053
3056
|
|
3054
3057
|
/* This will not be reached, but CRAN might complain otherwise */
|
3058
|
+
unreachable();
|
3055
3059
|
return -HUGE_VAL;
|
3056
3060
|
}
|
3057
3061
|
|
@@ -0,0 +1,184 @@
|
|
1
|
+
/* Copyright (c) 2001-2002 Enthought, Inc. 2003-2023, SciPy Developers.
|
2
|
+
All rights reserved.
|
3
|
+
|
4
|
+
Redistribution and use in source and binary forms, with or without
|
5
|
+
modification, are permitted provided that the following conditions
|
6
|
+
are met:
|
7
|
+
|
8
|
+
1. Redistributions of source code must retain the above copyright
|
9
|
+
notice, this list of conditions and the following disclaimer.
|
10
|
+
|
11
|
+
2. Redistributions in binary form must reproduce the above
|
12
|
+
copyright notice, this list of conditions and the following
|
13
|
+
disclaimer in the documentation and/or other materials provided
|
14
|
+
with the distribution.
|
15
|
+
|
16
|
+
3. Neither the name of the copyright holder nor the names of its
|
17
|
+
contributors may be used to endorse or promote products derived
|
18
|
+
from this software without specific prior written permission.
|
19
|
+
|
20
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
21
|
+
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
22
|
+
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
23
|
+
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
24
|
+
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
25
|
+
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
26
|
+
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
27
|
+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
28
|
+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
29
|
+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
30
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
31
|
+
|
32
|
+
Distributed under 3-clause BSD license with permission from the author,
|
33
|
+
see https://lists.debian.org/debian-legal/2004/12/msg00295.html
|
34
|
+
|
35
|
+
------------------------------------------------------------------
|
36
|
+
|
37
|
+
Cephes Math Library Release 2.8: June, 2000
|
38
|
+
Copyright 1984, 1995, 2000 by Stephen L. Moshier
|
39
|
+
|
40
|
+
This software is derived from the Cephes Math Library and is
|
41
|
+
incorporated herein by permission of the author.
|
42
|
+
|
43
|
+
All rights reserved.
|
44
|
+
|
45
|
+
Redistribution and use in source and binary forms, with or without
|
46
|
+
modification, are permitted provided that the following conditions are met:
|
47
|
+
* Redistributions of source code must retain the above copyright
|
48
|
+
notice, this list of conditions and the following disclaimer.
|
49
|
+
* Redistributions in binary form must reproduce the above copyright
|
50
|
+
notice, this list of conditions and the following disclaimer in the
|
51
|
+
documentation and/or other materials provided with the distribution.
|
52
|
+
* Neither the name of the <organization> nor the
|
53
|
+
names of its contributors may be used to endorse or promote products
|
54
|
+
derived from this software without specific prior written permission.
|
55
|
+
|
56
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
57
|
+
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
58
|
+
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
59
|
+
DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
|
60
|
+
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
61
|
+
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
62
|
+
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
63
|
+
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
64
|
+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
65
|
+
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
|
66
|
+
|
67
|
+
static inline void poly6_twice_return_p7
|
68
|
+
(
|
69
|
+
const double x,
|
70
|
+
const double *restrict coefs1,
|
71
|
+
const double *restrict coefs2,
|
72
|
+
double &restrict res1,
|
73
|
+
double &restrict res2,
|
74
|
+
double &restrict w7
|
75
|
+
)
|
76
|
+
{
|
77
|
+
double w[6];
|
78
|
+
w[0] = 1.;
|
79
|
+
w[1] = x;
|
80
|
+
w[2] = x*x;
|
81
|
+
w[3] = x*w[2];
|
82
|
+
w[4] = w[2]*w[2];
|
83
|
+
w[5] = w[2]*w[3];
|
84
|
+
w7 = w[3]*w[3];
|
85
|
+
res1 = 0.;
|
86
|
+
res2 = 0.;
|
87
|
+
for (int ix = 0; ix < 6; ix++)
|
88
|
+
{
|
89
|
+
res1 = std::fma(coefs1[ix], w[ix], res1);
|
90
|
+
res2 = std::fma(coefs2[ix], w[ix], res2);
|
91
|
+
}
|
92
|
+
}
|
93
|
+
|
94
|
+
static inline double poly7(const double x, const double *restrict coefs)
|
95
|
+
{
|
96
|
+
double w[7];
|
97
|
+
w[0] = 1.;
|
98
|
+
w[1] = x;
|
99
|
+
w[2] = x*x;
|
100
|
+
w[3] = x*w[2];
|
101
|
+
w[4] = w[2]*w[2];
|
102
|
+
w[5] = w[2]*w[3];
|
103
|
+
w[6] = w[3]*w[3];
|
104
|
+
|
105
|
+
double out = 0.;
|
106
|
+
for (int ix = 0; ix < 7; ix++)
|
107
|
+
out = std::fma(w[ix], coefs[ix], out);
|
108
|
+
return out;
|
109
|
+
}
|
110
|
+
|
111
|
+
static const double coefs_12_m[6] = {
|
112
|
+
0.25479851061131551,
|
113
|
+
-0.32555031186804491,
|
114
|
+
-0.65031853770896507,
|
115
|
+
-0.28919126444774784,
|
116
|
+
-0.045251321448739056,
|
117
|
+
-0.0020713321167745952
|
118
|
+
};
|
119
|
+
|
120
|
+
static const double coefs_12_d[6] = {
|
121
|
+
1.0,
|
122
|
+
2.0767117023730469,
|
123
|
+
1.4606242909763515,
|
124
|
+
0.43593529692665969,
|
125
|
+
0.054151797245674225,
|
126
|
+
0.0021284987017821144
|
127
|
+
};
|
128
|
+
|
129
|
+
static double coefs_asy[] = {
|
130
|
+
8.33333333333333333333E-2,
|
131
|
+
-8.33333333333333333333E-3,
|
132
|
+
3.96825396825396825397E-3,
|
133
|
+
-4.16666666666666666667E-3,
|
134
|
+
7.57575757575757575758E-3,
|
135
|
+
-2.10927960927960927961E-2,
|
136
|
+
8.33333333333333333333E-2
|
137
|
+
};
|
138
|
+
|
139
|
+
/* This is implemented only for positive non-integer inputs */
|
140
|
+
double digamma(double x)
|
141
|
+
{
|
142
|
+
/* check for positive integer up to 64 */
|
143
|
+
if (unlikely((x <= 64) && (x == std::floor(x)))) {
|
144
|
+
return harmonic_recursive(1.0, (double)x) - EULERS_GAMMA;
|
145
|
+
}
|
146
|
+
|
147
|
+
double y = 0.;
|
148
|
+
|
149
|
+
/* use the recurrence relation to move x into [1, 2] */
|
150
|
+
if (x < 1.) {
|
151
|
+
y -= 1. / x;
|
152
|
+
x += 1.;
|
153
|
+
}
|
154
|
+
else if (x < 10.) {
|
155
|
+
while (x > 2.) {
|
156
|
+
x -= 1.;
|
157
|
+
y += 1. / x;
|
158
|
+
}
|
159
|
+
}
|
160
|
+
|
161
|
+
if (x < 1. || x > 2.) {
|
162
|
+
double z = 1. / (x*x);
|
163
|
+
return y + std::log(x) - 0.5/x - z*poly7(z, coefs_asy);
|
164
|
+
}
|
165
|
+
|
166
|
+
const double r1 = 1.46163214463740587234;
|
167
|
+
const double r2 = 0.00000000033095646883;
|
168
|
+
const double r3 = 0.9016312093258695918615325266959189453125e-19;
|
169
|
+
const double Y = 0.99558162689208984;
|
170
|
+
double m, d, p7;
|
171
|
+
poly6_twice_return_p7(
|
172
|
+
x - 1.,
|
173
|
+
coefs_12_m,
|
174
|
+
coefs_12_d,
|
175
|
+
m,
|
176
|
+
d,
|
177
|
+
p7
|
178
|
+
);
|
179
|
+
double r = m / std::fma(p7, -0.55789841321675513e-6, d);
|
180
|
+
double g = x - r1;
|
181
|
+
g -= r2;
|
182
|
+
g -= r3;
|
183
|
+
return y + g*Y + g*r;
|
184
|
+
}
|
data/vendor/isotree/src/dist.hpp
CHANGED
@@ -234,7 +234,7 @@ void calc_similarity(real_t numeric_data[], int categ_data[],
|
|
234
234
|
{
|
235
235
|
if (use_long_double && !has_long_double()) {
|
236
236
|
use_long_double = false;
|
237
|
-
|
237
|
+
print_errmsg("Passed 'use_long_double=true', but library was compiled without long double support.\n");
|
238
238
|
}
|
239
239
|
#ifndef NO_LONG_DOUBLE
|
240
240
|
if (likely(!use_long_double))
|
@@ -297,7 +297,7 @@ void calc_similarity_internal(
|
|
297
297
|
throw std::runtime_error("Indexer was built without distances. Cannot use references from it.\n");
|
298
298
|
else {
|
299
299
|
indexer = NULL;
|
300
|
-
|
300
|
+
print_errmsg("Indexer has no pre-computed distances, will not be used for distance calculations.\n");
|
301
301
|
}
|
302
302
|
}
|
303
303
|
if (
|
@@ -598,7 +598,7 @@ int fit_iforest(IsoForest *model_outputs, ExtIsoForest *model_outputs_ext,
|
|
598
598
|
{
|
599
599
|
if (use_long_double && !has_long_double()) {
|
600
600
|
use_long_double = false;
|
601
|
-
|
601
|
+
print_errmsg("Passed 'use_long_double=true', but library was compiled without long double support.\n");
|
602
602
|
}
|
603
603
|
#ifndef NO_LONG_DOUBLE
|
604
604
|
if (likely(!use_long_double))
|
@@ -1400,7 +1400,7 @@ int add_tree(IsoForest *model_outputs, ExtIsoForest *model_outputs_ext,
|
|
1400
1400
|
{
|
1401
1401
|
if (use_long_double && !has_long_double()) {
|
1402
1402
|
use_long_double = false;
|
1403
|
-
|
1403
|
+
print_errmsg("Passed 'use_long_double=true', but library was compiled without long double support.\n");
|
1404
1404
|
}
|
1405
1405
|
#ifndef NO_LONG_DOUBLE
|
1406
1406
|
if (likely(!use_long_double))
|
@@ -145,7 +145,7 @@ void impute_missing_values(real_t numeric_data[], int categ_data[], bool is_col_
|
|
145
145
|
{
|
146
146
|
if (use_long_double && !has_long_double()) {
|
147
147
|
use_long_double = false;
|
148
|
-
|
148
|
+
print_errmsg("Passed 'use_long_double=true', but library was compiled without long double support.\n");
|
149
149
|
}
|
150
150
|
#ifndef NO_LONG_DOUBLE
|
151
151
|
if (likely(!use_long_double))
|
@@ -313,6 +313,7 @@ void initialize_imputer(Imputer &imputer, InputData &input_data, size_t ntrees,
|
|
313
313
|
imputer.col_modes.resize(input_data.ncols_categ);
|
314
314
|
imputer.imputer_tree = std::vector<std::vector<ImputeNode>>(ntrees);
|
315
315
|
|
316
|
+
/* TODO: here should use sample weights if specified as density */
|
316
317
|
size_t offset, cnt;
|
317
318
|
if (input_data.numeric_data != NULL)
|
318
319
|
{
|
@@ -328,6 +329,7 @@ void initialize_imputer(Imputer &imputer, InputData &input_data, size_t ntrees,
|
|
328
329
|
cnt -= is_na_or_inf(input_data.numeric_data[row + offset]);
|
329
330
|
}
|
330
331
|
imputer.col_means[col] /= (ldouble_safe) cnt;
|
332
|
+
if (!cnt) imputer.col_means[col] = NAN;
|
331
333
|
}
|
332
334
|
}
|
333
335
|
|
@@ -344,6 +346,7 @@ void initialize_imputer(Imputer &imputer, InputData &input_data, size_t ntrees,
|
|
344
346
|
cnt -= is_na_or_inf(input_data.Xc[ix]);
|
345
347
|
}
|
346
348
|
imputer.col_means[col] /= (ldouble_safe) cnt;
|
349
|
+
if (!cnt) imputer.col_means[col] = NAN;
|
347
350
|
}
|
348
351
|
}
|
349
352
|
|
@@ -624,7 +627,7 @@ void build_impute_node(ImputeNode &imputer, WorkerMemory &workspace,
|
|
624
627
|
imputer.cat_sum[col][cat]
|
625
628
|
+=
|
626
629
|
(imputer_tree[curr_tree].cat_sum[col][cat] > 0)?
|
627
|
-
(imputer_tree[curr_tree].cat_sum[col][cat] /
|
630
|
+
(imputer_tree[curr_tree].cat_sum[col][cat] / imputer_tree[curr_tree].cat_weight[col]) : 0.;
|
628
631
|
imputer.cat_weight[col] = wsum / (double)(2 * look_aboves);
|
629
632
|
}
|
630
633
|
break;
|
@@ -999,6 +1002,11 @@ void apply_imputation_results(std::vector<ImputedData> &impute_vec,
|
|
999
1002
|
}
|
1000
1003
|
|
1001
1004
|
|
1005
|
+
/* TODO: investigate why in the case of all-missing numeric columns the node weights still
|
1006
|
+
get filled when using extended model, then remove the workaround that was added here that
|
1007
|
+
checks if the sum is zero and column is all-nan. Should also modify the earlier code to
|
1008
|
+
remove these cases from the imputation tracking list when doing the imputations on-the-fly
|
1009
|
+
as the model is being fit. */
|
1002
1010
|
template <class PredictionData, class ImputedData>
|
1003
1011
|
void apply_imputation_results(PredictionData &prediction_data,
|
1004
1012
|
ImputedData &imp,
|
@@ -1012,7 +1020,7 @@ void apply_imputation_results(PredictionData &prediction_data,
|
|
1012
1020
|
for (size_t ix = 0; ix < imp.n_missing_num; ix++)
|
1013
1021
|
{
|
1014
1022
|
col = imp.missing_num[ix];
|
1015
|
-
if (imp.num_weight[ix] > 0 && !is_na_or_inf(imp.num_sum[ix]))
|
1023
|
+
if (imp.num_weight[ix] > 0 && !is_na_or_inf(imp.num_sum[ix]) && !(imp.num_sum[ix] == 0 && std::isnan(imputer.col_means[col])))
|
1016
1024
|
prediction_data.numeric_data[row + col * prediction_data.nrows]
|
1017
1025
|
=
|
1018
1026
|
imp.num_sum[ix] / imp.num_weight[ix];
|
@@ -1028,7 +1036,7 @@ void apply_imputation_results(PredictionData &prediction_data,
|
|
1028
1036
|
for (size_t ix = 0; ix < imp.n_missing_num; ix++)
|
1029
1037
|
{
|
1030
1038
|
col = imp.missing_num[ix];
|
1031
|
-
if (imp.num_weight[ix] > 0 && !is_na_or_inf(imp.num_sum[ix]))
|
1039
|
+
if (imp.num_weight[ix] > 0 && !is_na_or_inf(imp.num_sum[ix]) && !(imp.num_sum[ix] == 0 && std::isnan(imputer.col_means[col])))
|
1032
1040
|
prediction_data.numeric_data[col + row * imputer.ncols_numeric]
|
1033
1041
|
=
|
1034
1042
|
imp.num_sum[ix] / imp.num_weight[ix];
|
@@ -1066,10 +1074,20 @@ void apply_imputation_results(PredictionData &prediction_data,
|
|
1066
1074
|
std::distance(imp.cat_sum[col].begin(),
|
1067
1075
|
std::max_element(imp.cat_sum[col].begin(), imp.cat_sum[col].end()));
|
1068
1076
|
|
1069
|
-
if (prediction_data.categ_data[row + col * prediction_data.nrows] == 0
|
1070
|
-
|
1071
|
-
|
1072
|
-
|
1077
|
+
if (prediction_data.categ_data[row + col * prediction_data.nrows] == 0)
|
1078
|
+
{
|
1079
|
+
if (imp.cat_sum.empty() || imp.cat_sum[col].empty())
|
1080
|
+
{
|
1081
|
+
prediction_data.categ_data[row + col * prediction_data.nrows] = -1;
|
1082
|
+
}
|
1083
|
+
|
1084
|
+
else if (imp.cat_sum[col][0] <= 0)
|
1085
|
+
{
|
1086
|
+
prediction_data.categ_data[row + col * prediction_data.nrows]
|
1087
|
+
=
|
1088
|
+
imputer.col_modes[col];
|
1089
|
+
}
|
1090
|
+
}
|
1073
1091
|
}
|
1074
1092
|
}
|
1075
1093
|
|
@@ -1083,10 +1101,20 @@ void apply_imputation_results(PredictionData &prediction_data,
|
|
1083
1101
|
std::distance(imp.cat_sum[col].begin(),
|
1084
1102
|
std::max_element(imp.cat_sum[col].begin(), imp.cat_sum[col].end()));
|
1085
1103
|
|
1086
|
-
if (prediction_data.categ_data[col + row * imputer.ncols_categ] == 0
|
1087
|
-
|
1088
|
-
|
1089
|
-
|
1104
|
+
if (prediction_data.categ_data[col + row * imputer.ncols_categ] == 0)
|
1105
|
+
{
|
1106
|
+
if (imp.cat_sum.empty() || imp.cat_sum[col].empty())
|
1107
|
+
{
|
1108
|
+
prediction_data.categ_data[col + row * imputer.ncols_categ] = -1;
|
1109
|
+
}
|
1110
|
+
|
1111
|
+
else if (imp.cat_sum[col][0] <= 0)
|
1112
|
+
{
|
1113
|
+
prediction_data.categ_data[col + row * imputer.ncols_categ]
|
1114
|
+
=
|
1115
|
+
imputer.col_modes[col];
|
1116
|
+
}
|
1117
|
+
}
|
1090
1118
|
}
|
1091
1119
|
}
|
1092
1120
|
}
|
@@ -109,15 +109,13 @@
|
|
109
109
|
extern "C" {
|
110
110
|
#include <R_ext/Print.h>
|
111
111
|
}
|
112
|
-
#define
|
113
|
-
#define fprintf(f, message) REprintf(message)
|
112
|
+
#define print_errmsg(msg) REprintf("%s", msg)
|
114
113
|
#elif defined(_FOR_PYTHON)
|
115
|
-
extern "C"
|
116
|
-
#define
|
114
|
+
extern "C" int cy_warning(const char *msg);
|
115
|
+
#define print_errmsg cy_warning
|
117
116
|
#else
|
118
117
|
#include <cstdio>
|
119
|
-
|
120
|
-
using std::fprintf;
|
118
|
+
#define print_errmsg(msg) std::fprintf(stderr, "%s", msg)
|
121
119
|
#endif
|
122
120
|
#ifdef _OPENMP
|
123
121
|
#include <omp.h>
|
@@ -285,6 +283,19 @@ using std::memcpy;
|
|
285
283
|
#endif
|
286
284
|
#endif
|
287
285
|
|
286
|
+
#ifndef unreachable
|
287
|
+
# if (__cplusplus >= 202309L)
|
288
|
+
# include <utility>
|
289
|
+
using std::unreachable;
|
290
|
+
# elif defined(_MSC_VER)
|
291
|
+
# define unreachable() (__assume(false))
|
292
|
+
# elif defined(__GNUC__) || defined(__clang__)
|
293
|
+
# define unreachable() __builtin_unreachable()
|
294
|
+
# else
|
295
|
+
# define unreachable()
|
296
|
+
# endif
|
297
|
+
#endif
|
298
|
+
|
288
299
|
|
289
300
|
/* Apple at some point decided to drop OMP library and headers from its compiler distribution
|
290
301
|
* and to alias 'gcc' to 'clang', which work differently when given flags they cannot interpret,
|
@@ -1936,8 +1947,7 @@ public:
|
|
1936
1947
|
{
|
1937
1948
|
if (this->handle) {
|
1938
1949
|
int err = std::fclose(this->handle);
|
1939
|
-
if (err)
|
1940
|
-
fprintf(stderr, "Error: could not close file.\n");
|
1950
|
+
if (err) print_errmsg("Error: could not close file.\n");
|
1941
1951
|
}
|
1942
1952
|
this->handle = NULL;
|
1943
1953
|
}
|
@@ -1960,8 +1970,7 @@ public:
|
|
1960
1970
|
{
|
1961
1971
|
if (this->handle) {
|
1962
1972
|
int err = std::fclose(this->handle);
|
1963
|
-
if (err)
|
1964
|
-
fprintf(stderr, "Error: could not close file.\n");
|
1973
|
+
if (err) print_errmsg("Error: could not close file.\n");
|
1965
1974
|
}
|
1966
1975
|
this->handle = NULL;
|
1967
1976
|
}
|
data/vendor/isotree/src/mult.hpp
CHANGED
@@ -598,7 +598,7 @@ void add_linear_comb_weighted(size_t ix_arr[], size_t st, size_t end, double *re
|
|
598
598
|
w_this = w[ix_arr[row]];
|
599
599
|
res_write[row] = std::fma(x[ix_arr[row]] - x_mean, coef, res_write[row]);
|
600
600
|
obs_weight[cnt] = w_this;
|
601
|
-
buffer_arr[cnt++] =
|
601
|
+
buffer_arr[cnt++] = x[ix_arr[row]];
|
602
602
|
cumw += w_this;
|
603
603
|
}
|
604
604
|
|
@@ -628,14 +628,12 @@ void IsolationForest::check_nthreads()
|
|
628
628
|
#endif
|
629
629
|
}
|
630
630
|
if (nthreads <= 0) {
|
631
|
-
|
631
|
+
print_errmsg("'isotree' got invalid 'nthreads', will set to 1.\n");
|
632
632
|
this->nthreads = 1;
|
633
633
|
}
|
634
634
|
#ifndef _OPENMP
|
635
635
|
else if (nthreads > 1) {
|
636
|
-
|
637
|
-
"Passed nthreads:%d to 'isotree', but library was compiled without multithreading.\n",
|
638
|
-
this->nthreads);
|
636
|
+
print_errmsg("Passed nthreads>1 to 'isotree', but library was compiled without multithreading.\n");
|
639
637
|
this->nthreads = 1;
|
640
638
|
}
|
641
639
|
#endif
|
@@ -787,6 +785,10 @@ IsolationForest IsolationForest::deserialize_template(itype &inp, int nthreads)
|
|
787
785
|
ExtIsoForest model_ext = ExtIsoForest();
|
788
786
|
Imputer imputer = Imputer();
|
789
787
|
TreesIndexer indexer = TreesIndexer();
|
788
|
+
std::unique_ptr<char[]> buffer_metadata;
|
789
|
+
if (size_metadata) {
|
790
|
+
buffer_metadata = std::unique_ptr<char[]>(new char[size_metadata]);
|
791
|
+
}
|
790
792
|
|
791
793
|
deserialize_combined(
|
792
794
|
inp,
|
@@ -794,7 +796,7 @@ IsolationForest IsolationForest::deserialize_template(itype &inp, int nthreads)
|
|
794
796
|
&model_ext,
|
795
797
|
&imputer,
|
796
798
|
&indexer,
|
797
|
-
(char*)nullptr
|
799
|
+
size_metadata? buffer_metadata.get() : (char*)nullptr
|
798
800
|
);
|
799
801
|
|
800
802
|
if (model.trees.empty() && model_ext.hplanes.empty())
|
@@ -92,36 +92,6 @@ TreesIndexer get_Indexer()
|
|
92
92
|
return TreesIndexer();
|
93
93
|
}
|
94
94
|
|
95
|
-
/* Reason behind these functions: Cython (as of v0.29) will not auto-deallocate
|
96
|
-
structs which are part of a cdef'd class, which produces a memory leak
|
97
|
-
but can be force-destructed. Unfortunately, Cython itself doesn't even
|
98
|
-
allow calling destructors for structs, so it has to be done externally.
|
99
|
-
These functions should otherwise have no reason to be.
|
100
|
-
|
101
|
-
This is supposed to be already fixed in newer Cython versions:
|
102
|
-
https://github.com/cython/cython/issues/3226
|
103
|
-
But is not yet available in the relase versions at the time of writing */
|
104
|
-
|
105
|
-
void dealloc_IsoForest(IsoForest &model_outputs)
|
106
|
-
{
|
107
|
-
model_outputs.~IsoForest();
|
108
|
-
}
|
109
|
-
|
110
|
-
void dealloc_IsoExtForest(ExtIsoForest &model_outputs_ext)
|
111
|
-
{
|
112
|
-
model_outputs_ext.~ExtIsoForest();
|
113
|
-
}
|
114
|
-
|
115
|
-
void dealloc_Imputer(Imputer &imputer)
|
116
|
-
{
|
117
|
-
imputer.~Imputer();
|
118
|
-
}
|
119
|
-
|
120
|
-
void dealloc_Indexer(TreesIndexer &indexer)
|
121
|
-
{
|
122
|
-
indexer.~TreesIndexer();
|
123
|
-
}
|
124
|
-
|
125
95
|
bool get_has_openmp(void)
|
126
96
|
{
|
127
97
|
#ifdef _OPENMP
|
@@ -131,4 +101,14 @@ bool get_has_openmp(void)
|
|
131
101
|
#endif
|
132
102
|
}
|
133
103
|
|
104
|
+
size_t py_strerrorlen_s()
|
105
|
+
{
|
106
|
+
return strlen(strerror(errno));
|
107
|
+
}
|
108
|
+
|
109
|
+
void copy_errno_msg(char *inp)
|
110
|
+
{
|
111
|
+
strcpy(inp, strerror(errno));
|
112
|
+
}
|
113
|
+
|
134
114
|
#endif
|
@@ -90,7 +90,7 @@ target_link_libraries(your_target PRIVATE tsl::robin_map)
|
|
90
90
|
|
91
91
|
If the project has been installed through `make install`, you can also use `find_package(tsl-robin-map REQUIRED)` instead of `add_subdirectory`.
|
92
92
|
|
93
|
-
The library is available in [vcpkg](https://github.com/Microsoft/vcpkg/tree/master/ports/robin-map) and [conan](https://
|
93
|
+
The library is available in [vcpkg](https://github.com/Microsoft/vcpkg/tree/master/ports/robin-map) and [conan](https://conan.io/center/tsl-robin-map). It's also present in [Debian](https://packages.debian.org/buster/robin-map-dev), [Ubuntu](https://packages.ubuntu.com/disco/robin-map-dev) and [Fedora](https://apps.fedoraproject.org/packages/robin-map-devel) package repositories.
|
94
94
|
|
95
95
|
The code should work with any C++11 standard-compliant compiler and has been tested with GCC 4.8.4, Clang 3.5.0 and Visual Studio 2015.
|
96
96
|
|