isotree 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/LICENSE.txt +2 -2
- data/lib/isotree/version.rb +1 -1
- data/vendor/isotree/LICENSE +1 -1
- data/vendor/isotree/README.md +5 -5
- data/vendor/isotree/include/isotree.hpp +5 -0
- data/vendor/isotree/inst/COPYRIGHTS +70 -0
- data/vendor/isotree/src/RcppExports.cpp +130 -66
- data/vendor/isotree/src/Rwrapper.cpp +831 -346
- data/vendor/isotree/src/c_interface.cpp +6 -1
- data/vendor/isotree/src/crit.hpp +4 -0
- data/vendor/isotree/src/digamma.hpp +184 -0
- data/vendor/isotree/src/dist.hpp +2 -2
- data/vendor/isotree/src/fit_model.hpp +2 -2
- data/vendor/isotree/src/helpers_iforest.hpp +1 -0
- data/vendor/isotree/src/impute.hpp +40 -12
- data/vendor/isotree/src/isotree.hpp +19 -10
- data/vendor/isotree/src/mult.hpp +1 -1
- data/vendor/isotree/src/oop_interface.cpp +7 -5
- data/vendor/isotree/src/python_helpers.hpp +10 -30
- data/vendor/isotree/src/robinmap/README.md +1 -1
- data/vendor/isotree/src/robinmap/include/tsl/robin_hash.h +29 -10
- data/vendor/isotree/src/serialize.cpp +32 -16
- data/vendor/isotree/src/sql.cpp +2 -2
- data/vendor/isotree/src/utils.hpp +15 -37
- data/vendor/isotree/src/xoshiro.hpp +3 -7
- metadata +4 -3
@@ -798,13 +798,18 @@ void* isotree_deserialize_from_raw(const char *serialized_model, int nthreads)
|
|
798
798
|
Imputer imputer = Imputer();
|
799
799
|
TreesIndexer indexer = TreesIndexer();
|
800
800
|
|
801
|
+
std::unique_ptr<char[]> buffer_metadata;
|
802
|
+
if (size_metadata) {
|
803
|
+
buffer_metadata = std::unique_ptr<char[]>(new char[size_metadata]);
|
804
|
+
}
|
805
|
+
|
801
806
|
deserialize_combined(
|
802
807
|
serialized_model,
|
803
808
|
&model,
|
804
809
|
&model_ext,
|
805
810
|
&imputer,
|
806
811
|
&indexer,
|
807
|
-
(char*)nullptr
|
812
|
+
size_metadata? buffer_metadata.get() : (char*)nullptr
|
808
813
|
);
|
809
814
|
|
810
815
|
if (!model.trees.size() && !model_ext.hplanes.size())
|
data/vendor/isotree/src/crit.hpp
CHANGED
@@ -792,6 +792,7 @@ double calc_kurtosis_internal(size_t cnt, int x[], int ncat, size_t buffer_cnt[]
|
|
792
792
|
}
|
793
793
|
}
|
794
794
|
|
795
|
+
unreachable();
|
795
796
|
return -1; /* this will never be reached, but CRAN complains otherwise */
|
796
797
|
}
|
797
798
|
|
@@ -940,6 +941,7 @@ double calc_kurtosis_weighted_internal(std::vector<ldouble_safe> &buffer_cnt, in
|
|
940
941
|
}
|
941
942
|
}
|
942
943
|
|
944
|
+
unreachable();
|
943
945
|
return -1; /* this will never be reached, but CRAN complains otherwise */
|
944
946
|
}
|
945
947
|
|
@@ -2858,6 +2860,7 @@ double find_split_dens_longform(int *restrict x, int ncat, size_t *restrict ix_a
|
|
2858
2860
|
}
|
2859
2861
|
|
2860
2862
|
/* This will not be reached, but CRAN might complain otherwise */
|
2863
|
+
unreachable();
|
2861
2864
|
return -HUGE_VAL;
|
2862
2865
|
}
|
2863
2866
|
|
@@ -3052,6 +3055,7 @@ double find_split_dens_longform_weighted(int *restrict x, int ncat, size_t *rest
|
|
3052
3055
|
}
|
3053
3056
|
|
3054
3057
|
/* This will not be reached, but CRAN might complain otherwise */
|
3058
|
+
unreachable();
|
3055
3059
|
return -HUGE_VAL;
|
3056
3060
|
}
|
3057
3061
|
|
@@ -0,0 +1,184 @@
|
|
1
|
+
/* Copyright (c) 2001-2002 Enthought, Inc. 2003-2023, SciPy Developers.
|
2
|
+
All rights reserved.
|
3
|
+
|
4
|
+
Redistribution and use in source and binary forms, with or without
|
5
|
+
modification, are permitted provided that the following conditions
|
6
|
+
are met:
|
7
|
+
|
8
|
+
1. Redistributions of source code must retain the above copyright
|
9
|
+
notice, this list of conditions and the following disclaimer.
|
10
|
+
|
11
|
+
2. Redistributions in binary form must reproduce the above
|
12
|
+
copyright notice, this list of conditions and the following
|
13
|
+
disclaimer in the documentation and/or other materials provided
|
14
|
+
with the distribution.
|
15
|
+
|
16
|
+
3. Neither the name of the copyright holder nor the names of its
|
17
|
+
contributors may be used to endorse or promote products derived
|
18
|
+
from this software without specific prior written permission.
|
19
|
+
|
20
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
21
|
+
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
22
|
+
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
23
|
+
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
24
|
+
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
25
|
+
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
26
|
+
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
27
|
+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
28
|
+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
29
|
+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
30
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
31
|
+
|
32
|
+
Distributed under 3-clause BSD license with permission from the author,
|
33
|
+
see https://lists.debian.org/debian-legal/2004/12/msg00295.html
|
34
|
+
|
35
|
+
------------------------------------------------------------------
|
36
|
+
|
37
|
+
Cephes Math Library Release 2.8: June, 2000
|
38
|
+
Copyright 1984, 1995, 2000 by Stephen L. Moshier
|
39
|
+
|
40
|
+
This software is derived from the Cephes Math Library and is
|
41
|
+
incorporated herein by permission of the author.
|
42
|
+
|
43
|
+
All rights reserved.
|
44
|
+
|
45
|
+
Redistribution and use in source and binary forms, with or without
|
46
|
+
modification, are permitted provided that the following conditions are met:
|
47
|
+
* Redistributions of source code must retain the above copyright
|
48
|
+
notice, this list of conditions and the following disclaimer.
|
49
|
+
* Redistributions in binary form must reproduce the above copyright
|
50
|
+
notice, this list of conditions and the following disclaimer in the
|
51
|
+
documentation and/or other materials provided with the distribution.
|
52
|
+
* Neither the name of the <organization> nor the
|
53
|
+
names of its contributors may be used to endorse or promote products
|
54
|
+
derived from this software without specific prior written permission.
|
55
|
+
|
56
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
57
|
+
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
58
|
+
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
59
|
+
DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
|
60
|
+
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
61
|
+
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
62
|
+
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
63
|
+
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
64
|
+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
65
|
+
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */
|
66
|
+
|
67
|
+
static inline void poly6_twice_return_p7
|
68
|
+
(
|
69
|
+
const double x,
|
70
|
+
const double *restrict coefs1,
|
71
|
+
const double *restrict coefs2,
|
72
|
+
double &restrict res1,
|
73
|
+
double &restrict res2,
|
74
|
+
double &restrict w7
|
75
|
+
)
|
76
|
+
{
|
77
|
+
double w[6];
|
78
|
+
w[0] = 1.;
|
79
|
+
w[1] = x;
|
80
|
+
w[2] = x*x;
|
81
|
+
w[3] = x*w[2];
|
82
|
+
w[4] = w[2]*w[2];
|
83
|
+
w[5] = w[2]*w[3];
|
84
|
+
w7 = w[3]*w[3];
|
85
|
+
res1 = 0.;
|
86
|
+
res2 = 0.;
|
87
|
+
for (int ix = 0; ix < 6; ix++)
|
88
|
+
{
|
89
|
+
res1 = std::fma(coefs1[ix], w[ix], res1);
|
90
|
+
res2 = std::fma(coefs2[ix], w[ix], res2);
|
91
|
+
}
|
92
|
+
}
|
93
|
+
|
94
|
+
static inline double poly7(const double x, const double *restrict coefs)
|
95
|
+
{
|
96
|
+
double w[7];
|
97
|
+
w[0] = 1.;
|
98
|
+
w[1] = x;
|
99
|
+
w[2] = x*x;
|
100
|
+
w[3] = x*w[2];
|
101
|
+
w[4] = w[2]*w[2];
|
102
|
+
w[5] = w[2]*w[3];
|
103
|
+
w[6] = w[3]*w[3];
|
104
|
+
|
105
|
+
double out = 0.;
|
106
|
+
for (int ix = 0; ix < 7; ix++)
|
107
|
+
out = std::fma(w[ix], coefs[ix], out);
|
108
|
+
return out;
|
109
|
+
}
|
110
|
+
|
111
|
+
static const double coefs_12_m[6] = {
|
112
|
+
0.25479851061131551,
|
113
|
+
-0.32555031186804491,
|
114
|
+
-0.65031853770896507,
|
115
|
+
-0.28919126444774784,
|
116
|
+
-0.045251321448739056,
|
117
|
+
-0.0020713321167745952
|
118
|
+
};
|
119
|
+
|
120
|
+
static const double coefs_12_d[6] = {
|
121
|
+
1.0,
|
122
|
+
2.0767117023730469,
|
123
|
+
1.4606242909763515,
|
124
|
+
0.43593529692665969,
|
125
|
+
0.054151797245674225,
|
126
|
+
0.0021284987017821144
|
127
|
+
};
|
128
|
+
|
129
|
+
static double coefs_asy[] = {
|
130
|
+
8.33333333333333333333E-2,
|
131
|
+
-8.33333333333333333333E-3,
|
132
|
+
3.96825396825396825397E-3,
|
133
|
+
-4.16666666666666666667E-3,
|
134
|
+
7.57575757575757575758E-3,
|
135
|
+
-2.10927960927960927961E-2,
|
136
|
+
8.33333333333333333333E-2
|
137
|
+
};
|
138
|
+
|
139
|
+
/* This is implemented only for positive non-integer inputs */
|
140
|
+
double digamma(double x)
|
141
|
+
{
|
142
|
+
/* check for positive integer up to 64 */
|
143
|
+
if (unlikely((x <= 64) && (x == std::floor(x)))) {
|
144
|
+
return harmonic_recursive(1.0, (double)x) - EULERS_GAMMA;
|
145
|
+
}
|
146
|
+
|
147
|
+
double y = 0.;
|
148
|
+
|
149
|
+
/* use the recurrence relation to move x into [1, 2] */
|
150
|
+
if (x < 1.) {
|
151
|
+
y -= 1. / x;
|
152
|
+
x += 1.;
|
153
|
+
}
|
154
|
+
else if (x < 10.) {
|
155
|
+
while (x > 2.) {
|
156
|
+
x -= 1.;
|
157
|
+
y += 1. / x;
|
158
|
+
}
|
159
|
+
}
|
160
|
+
|
161
|
+
if (x < 1. || x > 2.) {
|
162
|
+
double z = 1. / (x*x);
|
163
|
+
return y + std::log(x) - 0.5/x - z*poly7(z, coefs_asy);
|
164
|
+
}
|
165
|
+
|
166
|
+
const double r1 = 1.46163214463740587234;
|
167
|
+
const double r2 = 0.00000000033095646883;
|
168
|
+
const double r3 = 0.9016312093258695918615325266959189453125e-19;
|
169
|
+
const double Y = 0.99558162689208984;
|
170
|
+
double m, d, p7;
|
171
|
+
poly6_twice_return_p7(
|
172
|
+
x - 1.,
|
173
|
+
coefs_12_m,
|
174
|
+
coefs_12_d,
|
175
|
+
m,
|
176
|
+
d,
|
177
|
+
p7
|
178
|
+
);
|
179
|
+
double r = m / std::fma(p7, -0.55789841321675513e-6, d);
|
180
|
+
double g = x - r1;
|
181
|
+
g -= r2;
|
182
|
+
g -= r3;
|
183
|
+
return y + g*Y + g*r;
|
184
|
+
}
|
data/vendor/isotree/src/dist.hpp
CHANGED
@@ -234,7 +234,7 @@ void calc_similarity(real_t numeric_data[], int categ_data[],
|
|
234
234
|
{
|
235
235
|
if (use_long_double && !has_long_double()) {
|
236
236
|
use_long_double = false;
|
237
|
-
|
237
|
+
print_errmsg("Passed 'use_long_double=true', but library was compiled without long double support.\n");
|
238
238
|
}
|
239
239
|
#ifndef NO_LONG_DOUBLE
|
240
240
|
if (likely(!use_long_double))
|
@@ -297,7 +297,7 @@ void calc_similarity_internal(
|
|
297
297
|
throw std::runtime_error("Indexer was built without distances. Cannot use references from it.\n");
|
298
298
|
else {
|
299
299
|
indexer = NULL;
|
300
|
-
|
300
|
+
print_errmsg("Indexer has no pre-computed distances, will not be used for distance calculations.\n");
|
301
301
|
}
|
302
302
|
}
|
303
303
|
if (
|
@@ -598,7 +598,7 @@ int fit_iforest(IsoForest *model_outputs, ExtIsoForest *model_outputs_ext,
|
|
598
598
|
{
|
599
599
|
if (use_long_double && !has_long_double()) {
|
600
600
|
use_long_double = false;
|
601
|
-
|
601
|
+
print_errmsg("Passed 'use_long_double=true', but library was compiled without long double support.\n");
|
602
602
|
}
|
603
603
|
#ifndef NO_LONG_DOUBLE
|
604
604
|
if (likely(!use_long_double))
|
@@ -1400,7 +1400,7 @@ int add_tree(IsoForest *model_outputs, ExtIsoForest *model_outputs_ext,
|
|
1400
1400
|
{
|
1401
1401
|
if (use_long_double && !has_long_double()) {
|
1402
1402
|
use_long_double = false;
|
1403
|
-
|
1403
|
+
print_errmsg("Passed 'use_long_double=true', but library was compiled without long double support.\n");
|
1404
1404
|
}
|
1405
1405
|
#ifndef NO_LONG_DOUBLE
|
1406
1406
|
if (likely(!use_long_double))
|
@@ -145,7 +145,7 @@ void impute_missing_values(real_t numeric_data[], int categ_data[], bool is_col_
|
|
145
145
|
{
|
146
146
|
if (use_long_double && !has_long_double()) {
|
147
147
|
use_long_double = false;
|
148
|
-
|
148
|
+
print_errmsg("Passed 'use_long_double=true', but library was compiled without long double support.\n");
|
149
149
|
}
|
150
150
|
#ifndef NO_LONG_DOUBLE
|
151
151
|
if (likely(!use_long_double))
|
@@ -313,6 +313,7 @@ void initialize_imputer(Imputer &imputer, InputData &input_data, size_t ntrees,
|
|
313
313
|
imputer.col_modes.resize(input_data.ncols_categ);
|
314
314
|
imputer.imputer_tree = std::vector<std::vector<ImputeNode>>(ntrees);
|
315
315
|
|
316
|
+
/* TODO: here should use sample weights if specified as density */
|
316
317
|
size_t offset, cnt;
|
317
318
|
if (input_data.numeric_data != NULL)
|
318
319
|
{
|
@@ -328,6 +329,7 @@ void initialize_imputer(Imputer &imputer, InputData &input_data, size_t ntrees,
|
|
328
329
|
cnt -= is_na_or_inf(input_data.numeric_data[row + offset]);
|
329
330
|
}
|
330
331
|
imputer.col_means[col] /= (ldouble_safe) cnt;
|
332
|
+
if (!cnt) imputer.col_means[col] = NAN;
|
331
333
|
}
|
332
334
|
}
|
333
335
|
|
@@ -344,6 +346,7 @@ void initialize_imputer(Imputer &imputer, InputData &input_data, size_t ntrees,
|
|
344
346
|
cnt -= is_na_or_inf(input_data.Xc[ix]);
|
345
347
|
}
|
346
348
|
imputer.col_means[col] /= (ldouble_safe) cnt;
|
349
|
+
if (!cnt) imputer.col_means[col] = NAN;
|
347
350
|
}
|
348
351
|
}
|
349
352
|
|
@@ -624,7 +627,7 @@ void build_impute_node(ImputeNode &imputer, WorkerMemory &workspace,
|
|
624
627
|
imputer.cat_sum[col][cat]
|
625
628
|
+=
|
626
629
|
(imputer_tree[curr_tree].cat_sum[col][cat] > 0)?
|
627
|
-
(imputer_tree[curr_tree].cat_sum[col][cat] /
|
630
|
+
(imputer_tree[curr_tree].cat_sum[col][cat] / imputer_tree[curr_tree].cat_weight[col]) : 0.;
|
628
631
|
imputer.cat_weight[col] = wsum / (double)(2 * look_aboves);
|
629
632
|
}
|
630
633
|
break;
|
@@ -999,6 +1002,11 @@ void apply_imputation_results(std::vector<ImputedData> &impute_vec,
|
|
999
1002
|
}
|
1000
1003
|
|
1001
1004
|
|
1005
|
+
/* TODO: investigate why in the case of all-missing numeric columns the node weights still
|
1006
|
+
get filled when using extended model, then remove the workaround that was added here that
|
1007
|
+
checks if the sum is zero and column is all-nan. Should also modify the earlier code to
|
1008
|
+
remove these cases from the imputation tracking list when doing the imputations on-the-fly
|
1009
|
+
as the model is being fit. */
|
1002
1010
|
template <class PredictionData, class ImputedData>
|
1003
1011
|
void apply_imputation_results(PredictionData &prediction_data,
|
1004
1012
|
ImputedData &imp,
|
@@ -1012,7 +1020,7 @@ void apply_imputation_results(PredictionData &prediction_data,
|
|
1012
1020
|
for (size_t ix = 0; ix < imp.n_missing_num; ix++)
|
1013
1021
|
{
|
1014
1022
|
col = imp.missing_num[ix];
|
1015
|
-
if (imp.num_weight[ix] > 0 && !is_na_or_inf(imp.num_sum[ix]))
|
1023
|
+
if (imp.num_weight[ix] > 0 && !is_na_or_inf(imp.num_sum[ix]) && !(imp.num_sum[ix] == 0 && std::isnan(imputer.col_means[col])))
|
1016
1024
|
prediction_data.numeric_data[row + col * prediction_data.nrows]
|
1017
1025
|
=
|
1018
1026
|
imp.num_sum[ix] / imp.num_weight[ix];
|
@@ -1028,7 +1036,7 @@ void apply_imputation_results(PredictionData &prediction_data,
|
|
1028
1036
|
for (size_t ix = 0; ix < imp.n_missing_num; ix++)
|
1029
1037
|
{
|
1030
1038
|
col = imp.missing_num[ix];
|
1031
|
-
if (imp.num_weight[ix] > 0 && !is_na_or_inf(imp.num_sum[ix]))
|
1039
|
+
if (imp.num_weight[ix] > 0 && !is_na_or_inf(imp.num_sum[ix]) && !(imp.num_sum[ix] == 0 && std::isnan(imputer.col_means[col])))
|
1032
1040
|
prediction_data.numeric_data[col + row * imputer.ncols_numeric]
|
1033
1041
|
=
|
1034
1042
|
imp.num_sum[ix] / imp.num_weight[ix];
|
@@ -1066,10 +1074,20 @@ void apply_imputation_results(PredictionData &prediction_data,
|
|
1066
1074
|
std::distance(imp.cat_sum[col].begin(),
|
1067
1075
|
std::max_element(imp.cat_sum[col].begin(), imp.cat_sum[col].end()));
|
1068
1076
|
|
1069
|
-
if (prediction_data.categ_data[row + col * prediction_data.nrows] == 0
|
1070
|
-
|
1071
|
-
|
1072
|
-
|
1077
|
+
if (prediction_data.categ_data[row + col * prediction_data.nrows] == 0)
|
1078
|
+
{
|
1079
|
+
if (imp.cat_sum.empty() || imp.cat_sum[col].empty())
|
1080
|
+
{
|
1081
|
+
prediction_data.categ_data[row + col * prediction_data.nrows] = -1;
|
1082
|
+
}
|
1083
|
+
|
1084
|
+
else if (imp.cat_sum[col][0] <= 0)
|
1085
|
+
{
|
1086
|
+
prediction_data.categ_data[row + col * prediction_data.nrows]
|
1087
|
+
=
|
1088
|
+
imputer.col_modes[col];
|
1089
|
+
}
|
1090
|
+
}
|
1073
1091
|
}
|
1074
1092
|
}
|
1075
1093
|
|
@@ -1083,10 +1101,20 @@ void apply_imputation_results(PredictionData &prediction_data,
|
|
1083
1101
|
std::distance(imp.cat_sum[col].begin(),
|
1084
1102
|
std::max_element(imp.cat_sum[col].begin(), imp.cat_sum[col].end()));
|
1085
1103
|
|
1086
|
-
if (prediction_data.categ_data[col + row * imputer.ncols_categ] == 0
|
1087
|
-
|
1088
|
-
|
1089
|
-
|
1104
|
+
if (prediction_data.categ_data[col + row * imputer.ncols_categ] == 0)
|
1105
|
+
{
|
1106
|
+
if (imp.cat_sum.empty() || imp.cat_sum[col].empty())
|
1107
|
+
{
|
1108
|
+
prediction_data.categ_data[col + row * imputer.ncols_categ] = -1;
|
1109
|
+
}
|
1110
|
+
|
1111
|
+
else if (imp.cat_sum[col][0] <= 0)
|
1112
|
+
{
|
1113
|
+
prediction_data.categ_data[col + row * imputer.ncols_categ]
|
1114
|
+
=
|
1115
|
+
imputer.col_modes[col];
|
1116
|
+
}
|
1117
|
+
}
|
1090
1118
|
}
|
1091
1119
|
}
|
1092
1120
|
}
|
@@ -109,15 +109,13 @@
|
|
109
109
|
extern "C" {
|
110
110
|
#include <R_ext/Print.h>
|
111
111
|
}
|
112
|
-
#define
|
113
|
-
#define fprintf(f, message) REprintf(message)
|
112
|
+
#define print_errmsg(msg) REprintf("%s", msg)
|
114
113
|
#elif defined(_FOR_PYTHON)
|
115
|
-
extern "C"
|
116
|
-
#define
|
114
|
+
extern "C" int cy_warning(const char *msg);
|
115
|
+
#define print_errmsg cy_warning
|
117
116
|
#else
|
118
117
|
#include <cstdio>
|
119
|
-
|
120
|
-
using std::fprintf;
|
118
|
+
#define print_errmsg(msg) std::fprintf(stderr, "%s", msg)
|
121
119
|
#endif
|
122
120
|
#ifdef _OPENMP
|
123
121
|
#include <omp.h>
|
@@ -285,6 +283,19 @@ using std::memcpy;
|
|
285
283
|
#endif
|
286
284
|
#endif
|
287
285
|
|
286
|
+
#ifndef unreachable
|
287
|
+
# if (__cplusplus >= 202309L)
|
288
|
+
# include <utility>
|
289
|
+
using std::unreachable;
|
290
|
+
# elif defined(_MSC_VER)
|
291
|
+
# define unreachable() (__assume(false))
|
292
|
+
# elif defined(__GNUC__) || defined(__clang__)
|
293
|
+
# define unreachable() __builtin_unreachable()
|
294
|
+
# else
|
295
|
+
# define unreachable()
|
296
|
+
# endif
|
297
|
+
#endif
|
298
|
+
|
288
299
|
|
289
300
|
/* Apple at some point decided to drop OMP library and headers from its compiler distribution
|
290
301
|
* and to alias 'gcc' to 'clang', which work differently when given flags they cannot interpret,
|
@@ -1936,8 +1947,7 @@ public:
|
|
1936
1947
|
{
|
1937
1948
|
if (this->handle) {
|
1938
1949
|
int err = std::fclose(this->handle);
|
1939
|
-
if (err)
|
1940
|
-
fprintf(stderr, "Error: could not close file.\n");
|
1950
|
+
if (err) print_errmsg("Error: could not close file.\n");
|
1941
1951
|
}
|
1942
1952
|
this->handle = NULL;
|
1943
1953
|
}
|
@@ -1960,8 +1970,7 @@ public:
|
|
1960
1970
|
{
|
1961
1971
|
if (this->handle) {
|
1962
1972
|
int err = std::fclose(this->handle);
|
1963
|
-
if (err)
|
1964
|
-
fprintf(stderr, "Error: could not close file.\n");
|
1973
|
+
if (err) print_errmsg("Error: could not close file.\n");
|
1965
1974
|
}
|
1966
1975
|
this->handle = NULL;
|
1967
1976
|
}
|
data/vendor/isotree/src/mult.hpp
CHANGED
@@ -598,7 +598,7 @@ void add_linear_comb_weighted(size_t ix_arr[], size_t st, size_t end, double *re
|
|
598
598
|
w_this = w[ix_arr[row]];
|
599
599
|
res_write[row] = std::fma(x[ix_arr[row]] - x_mean, coef, res_write[row]);
|
600
600
|
obs_weight[cnt] = w_this;
|
601
|
-
buffer_arr[cnt++] =
|
601
|
+
buffer_arr[cnt++] = x[ix_arr[row]];
|
602
602
|
cumw += w_this;
|
603
603
|
}
|
604
604
|
|
@@ -628,14 +628,12 @@ void IsolationForest::check_nthreads()
|
|
628
628
|
#endif
|
629
629
|
}
|
630
630
|
if (nthreads <= 0) {
|
631
|
-
|
631
|
+
print_errmsg("'isotree' got invalid 'nthreads', will set to 1.\n");
|
632
632
|
this->nthreads = 1;
|
633
633
|
}
|
634
634
|
#ifndef _OPENMP
|
635
635
|
else if (nthreads > 1) {
|
636
|
-
|
637
|
-
"Passed nthreads:%d to 'isotree', but library was compiled without multithreading.\n",
|
638
|
-
this->nthreads);
|
636
|
+
print_errmsg("Passed nthreads>1 to 'isotree', but library was compiled without multithreading.\n");
|
639
637
|
this->nthreads = 1;
|
640
638
|
}
|
641
639
|
#endif
|
@@ -787,6 +785,10 @@ IsolationForest IsolationForest::deserialize_template(itype &inp, int nthreads)
|
|
787
785
|
ExtIsoForest model_ext = ExtIsoForest();
|
788
786
|
Imputer imputer = Imputer();
|
789
787
|
TreesIndexer indexer = TreesIndexer();
|
788
|
+
std::unique_ptr<char[]> buffer_metadata;
|
789
|
+
if (size_metadata) {
|
790
|
+
buffer_metadata = std::unique_ptr<char[]>(new char[size_metadata]);
|
791
|
+
}
|
790
792
|
|
791
793
|
deserialize_combined(
|
792
794
|
inp,
|
@@ -794,7 +796,7 @@ IsolationForest IsolationForest::deserialize_template(itype &inp, int nthreads)
|
|
794
796
|
&model_ext,
|
795
797
|
&imputer,
|
796
798
|
&indexer,
|
797
|
-
(char*)nullptr
|
799
|
+
size_metadata? buffer_metadata.get() : (char*)nullptr
|
798
800
|
);
|
799
801
|
|
800
802
|
if (model.trees.empty() && model_ext.hplanes.empty())
|
@@ -92,36 +92,6 @@ TreesIndexer get_Indexer()
|
|
92
92
|
return TreesIndexer();
|
93
93
|
}
|
94
94
|
|
95
|
-
/* Reason behind these functions: Cython (as of v0.29) will not auto-deallocate
|
96
|
-
structs which are part of a cdef'd class, which produces a memory leak
|
97
|
-
but can be force-destructed. Unfortunately, Cython itself doesn't even
|
98
|
-
allow calling destructors for structs, so it has to be done externally.
|
99
|
-
These functions should otherwise have no reason to be.
|
100
|
-
|
101
|
-
This is supposed to be already fixed in newer Cython versions:
|
102
|
-
https://github.com/cython/cython/issues/3226
|
103
|
-
But is not yet available in the relase versions at the time of writing */
|
104
|
-
|
105
|
-
void dealloc_IsoForest(IsoForest &model_outputs)
|
106
|
-
{
|
107
|
-
model_outputs.~IsoForest();
|
108
|
-
}
|
109
|
-
|
110
|
-
void dealloc_IsoExtForest(ExtIsoForest &model_outputs_ext)
|
111
|
-
{
|
112
|
-
model_outputs_ext.~ExtIsoForest();
|
113
|
-
}
|
114
|
-
|
115
|
-
void dealloc_Imputer(Imputer &imputer)
|
116
|
-
{
|
117
|
-
imputer.~Imputer();
|
118
|
-
}
|
119
|
-
|
120
|
-
void dealloc_Indexer(TreesIndexer &indexer)
|
121
|
-
{
|
122
|
-
indexer.~TreesIndexer();
|
123
|
-
}
|
124
|
-
|
125
95
|
bool get_has_openmp(void)
|
126
96
|
{
|
127
97
|
#ifdef _OPENMP
|
@@ -131,4 +101,14 @@ bool get_has_openmp(void)
|
|
131
101
|
#endif
|
132
102
|
}
|
133
103
|
|
104
|
+
size_t py_strerrorlen_s()
|
105
|
+
{
|
106
|
+
return strlen(strerror(errno));
|
107
|
+
}
|
108
|
+
|
109
|
+
void copy_errno_msg(char *inp)
|
110
|
+
{
|
111
|
+
strcpy(inp, strerror(errno));
|
112
|
+
}
|
113
|
+
|
134
114
|
#endif
|
@@ -90,7 +90,7 @@ target_link_libraries(your_target PRIVATE tsl::robin_map)
|
|
90
90
|
|
91
91
|
If the project has been installed through `make install`, you can also use `find_package(tsl-robin-map REQUIRED)` instead of `add_subdirectory`.
|
92
92
|
|
93
|
-
The library is available in [vcpkg](https://github.com/Microsoft/vcpkg/tree/master/ports/robin-map) and [conan](https://
|
93
|
+
The library is available in [vcpkg](https://github.com/Microsoft/vcpkg/tree/master/ports/robin-map) and [conan](https://conan.io/center/tsl-robin-map). It's also present in [Debian](https://packages.debian.org/buster/robin-map-dev), [Ubuntu](https://packages.ubuntu.com/disco/robin-map-dev) and [Fedora](https://apps.fedoraproject.org/packages/robin-map-devel) package repositories.
|
94
94
|
|
95
95
|
The code should work with any C++11 standard-compliant compiler and has been tested with GCC 4.8.4, Clang 3.5.0 and Visual Studio 2015.
|
96
96
|
|