isotree 0.2.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -1
- data/LICENSE.txt +2 -2
- data/README.md +32 -14
- data/ext/isotree/ext.cpp +144 -31
- data/ext/isotree/extconf.rb +7 -7
- data/lib/isotree/isolation_forest.rb +110 -30
- data/lib/isotree/version.rb +1 -1
- data/vendor/isotree/LICENSE +1 -1
- data/vendor/isotree/README.md +165 -27
- data/vendor/isotree/include/isotree.hpp +2111 -0
- data/vendor/isotree/include/isotree_oop.hpp +394 -0
- data/vendor/isotree/inst/COPYRIGHTS +62 -0
- data/vendor/isotree/src/RcppExports.cpp +525 -52
- data/vendor/isotree/src/Rwrapper.cpp +1931 -268
- data/vendor/isotree/src/c_interface.cpp +953 -0
- data/vendor/isotree/src/crit.hpp +4232 -0
- data/vendor/isotree/src/dist.hpp +1886 -0
- data/vendor/isotree/src/exp_depth_table.hpp +134 -0
- data/vendor/isotree/src/extended.hpp +1444 -0
- data/vendor/isotree/src/external_facing_generic.hpp +399 -0
- data/vendor/isotree/src/fit_model.hpp +2401 -0
- data/vendor/isotree/src/{dealloc.cpp → headers_joined.hpp} +38 -22
- data/vendor/isotree/src/helpers_iforest.hpp +813 -0
- data/vendor/isotree/src/{impute.cpp → impute.hpp} +353 -122
- data/vendor/isotree/src/indexer.cpp +515 -0
- data/vendor/isotree/src/instantiate_template_headers.cpp +118 -0
- data/vendor/isotree/src/instantiate_template_headers.hpp +240 -0
- data/vendor/isotree/src/isoforest.hpp +1659 -0
- data/vendor/isotree/src/isotree.hpp +1804 -392
- data/vendor/isotree/src/isotree_exportable.hpp +99 -0
- data/vendor/isotree/src/merge_models.cpp +159 -16
- data/vendor/isotree/src/mult.hpp +1321 -0
- data/vendor/isotree/src/oop_interface.cpp +842 -0
- data/vendor/isotree/src/oop_interface.hpp +278 -0
- data/vendor/isotree/src/other_helpers.hpp +219 -0
- data/vendor/isotree/src/predict.hpp +1932 -0
- data/vendor/isotree/src/python_helpers.hpp +134 -0
- data/vendor/isotree/src/ref_indexer.hpp +154 -0
- data/vendor/isotree/src/robinmap/LICENSE +21 -0
- data/vendor/isotree/src/robinmap/README.md +483 -0
- data/vendor/isotree/src/robinmap/include/tsl/robin_growth_policy.h +406 -0
- data/vendor/isotree/src/robinmap/include/tsl/robin_hash.h +1620 -0
- data/vendor/isotree/src/robinmap/include/tsl/robin_map.h +807 -0
- data/vendor/isotree/src/robinmap/include/tsl/robin_set.h +660 -0
- data/vendor/isotree/src/serialize.cpp +4300 -139
- data/vendor/isotree/src/sql.cpp +141 -59
- data/vendor/isotree/src/subset_models.cpp +174 -0
- data/vendor/isotree/src/utils.hpp +3808 -0
- data/vendor/isotree/src/xoshiro.hpp +467 -0
- data/vendor/isotree/src/ziggurat.hpp +405 -0
- metadata +38 -104
- data/vendor/cereal/LICENSE +0 -24
- data/vendor/cereal/README.md +0 -85
- data/vendor/cereal/include/cereal/access.hpp +0 -351
- data/vendor/cereal/include/cereal/archives/adapters.hpp +0 -163
- data/vendor/cereal/include/cereal/archives/binary.hpp +0 -169
- data/vendor/cereal/include/cereal/archives/json.hpp +0 -1019
- data/vendor/cereal/include/cereal/archives/portable_binary.hpp +0 -334
- data/vendor/cereal/include/cereal/archives/xml.hpp +0 -956
- data/vendor/cereal/include/cereal/cereal.hpp +0 -1089
- data/vendor/cereal/include/cereal/details/helpers.hpp +0 -422
- data/vendor/cereal/include/cereal/details/polymorphic_impl.hpp +0 -796
- data/vendor/cereal/include/cereal/details/polymorphic_impl_fwd.hpp +0 -65
- data/vendor/cereal/include/cereal/details/static_object.hpp +0 -127
- data/vendor/cereal/include/cereal/details/traits.hpp +0 -1411
- data/vendor/cereal/include/cereal/details/util.hpp +0 -84
- data/vendor/cereal/include/cereal/external/base64.hpp +0 -134
- data/vendor/cereal/include/cereal/external/rapidjson/allocators.h +0 -284
- data/vendor/cereal/include/cereal/external/rapidjson/cursorstreamwrapper.h +0 -78
- data/vendor/cereal/include/cereal/external/rapidjson/document.h +0 -2652
- data/vendor/cereal/include/cereal/external/rapidjson/encodedstream.h +0 -299
- data/vendor/cereal/include/cereal/external/rapidjson/encodings.h +0 -716
- data/vendor/cereal/include/cereal/external/rapidjson/error/en.h +0 -74
- data/vendor/cereal/include/cereal/external/rapidjson/error/error.h +0 -161
- data/vendor/cereal/include/cereal/external/rapidjson/filereadstream.h +0 -99
- data/vendor/cereal/include/cereal/external/rapidjson/filewritestream.h +0 -104
- data/vendor/cereal/include/cereal/external/rapidjson/fwd.h +0 -151
- data/vendor/cereal/include/cereal/external/rapidjson/internal/biginteger.h +0 -290
- data/vendor/cereal/include/cereal/external/rapidjson/internal/diyfp.h +0 -271
- data/vendor/cereal/include/cereal/external/rapidjson/internal/dtoa.h +0 -245
- data/vendor/cereal/include/cereal/external/rapidjson/internal/ieee754.h +0 -78
- data/vendor/cereal/include/cereal/external/rapidjson/internal/itoa.h +0 -308
- data/vendor/cereal/include/cereal/external/rapidjson/internal/meta.h +0 -186
- data/vendor/cereal/include/cereal/external/rapidjson/internal/pow10.h +0 -55
- data/vendor/cereal/include/cereal/external/rapidjson/internal/regex.h +0 -740
- data/vendor/cereal/include/cereal/external/rapidjson/internal/stack.h +0 -232
- data/vendor/cereal/include/cereal/external/rapidjson/internal/strfunc.h +0 -69
- data/vendor/cereal/include/cereal/external/rapidjson/internal/strtod.h +0 -290
- data/vendor/cereal/include/cereal/external/rapidjson/internal/swap.h +0 -46
- data/vendor/cereal/include/cereal/external/rapidjson/istreamwrapper.h +0 -128
- data/vendor/cereal/include/cereal/external/rapidjson/memorybuffer.h +0 -70
- data/vendor/cereal/include/cereal/external/rapidjson/memorystream.h +0 -71
- data/vendor/cereal/include/cereal/external/rapidjson/msinttypes/inttypes.h +0 -316
- data/vendor/cereal/include/cereal/external/rapidjson/msinttypes/stdint.h +0 -300
- data/vendor/cereal/include/cereal/external/rapidjson/ostreamwrapper.h +0 -81
- data/vendor/cereal/include/cereal/external/rapidjson/pointer.h +0 -1414
- data/vendor/cereal/include/cereal/external/rapidjson/prettywriter.h +0 -277
- data/vendor/cereal/include/cereal/external/rapidjson/rapidjson.h +0 -656
- data/vendor/cereal/include/cereal/external/rapidjson/reader.h +0 -2230
- data/vendor/cereal/include/cereal/external/rapidjson/schema.h +0 -2497
- data/vendor/cereal/include/cereal/external/rapidjson/stream.h +0 -223
- data/vendor/cereal/include/cereal/external/rapidjson/stringbuffer.h +0 -121
- data/vendor/cereal/include/cereal/external/rapidjson/writer.h +0 -709
- data/vendor/cereal/include/cereal/external/rapidxml/license.txt +0 -52
- data/vendor/cereal/include/cereal/external/rapidxml/manual.html +0 -406
- data/vendor/cereal/include/cereal/external/rapidxml/rapidxml.hpp +0 -2624
- data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_iterators.hpp +0 -175
- data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_print.hpp +0 -428
- data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_utils.hpp +0 -123
- data/vendor/cereal/include/cereal/macros.hpp +0 -154
- data/vendor/cereal/include/cereal/specialize.hpp +0 -139
- data/vendor/cereal/include/cereal/types/array.hpp +0 -79
- data/vendor/cereal/include/cereal/types/atomic.hpp +0 -55
- data/vendor/cereal/include/cereal/types/base_class.hpp +0 -203
- data/vendor/cereal/include/cereal/types/bitset.hpp +0 -176
- data/vendor/cereal/include/cereal/types/boost_variant.hpp +0 -164
- data/vendor/cereal/include/cereal/types/chrono.hpp +0 -72
- data/vendor/cereal/include/cereal/types/common.hpp +0 -129
- data/vendor/cereal/include/cereal/types/complex.hpp +0 -56
- data/vendor/cereal/include/cereal/types/concepts/pair_associative_container.hpp +0 -73
- data/vendor/cereal/include/cereal/types/deque.hpp +0 -62
- data/vendor/cereal/include/cereal/types/forward_list.hpp +0 -68
- data/vendor/cereal/include/cereal/types/functional.hpp +0 -43
- data/vendor/cereal/include/cereal/types/list.hpp +0 -62
- data/vendor/cereal/include/cereal/types/map.hpp +0 -36
- data/vendor/cereal/include/cereal/types/memory.hpp +0 -425
- data/vendor/cereal/include/cereal/types/optional.hpp +0 -66
- data/vendor/cereal/include/cereal/types/polymorphic.hpp +0 -483
- data/vendor/cereal/include/cereal/types/queue.hpp +0 -132
- data/vendor/cereal/include/cereal/types/set.hpp +0 -103
- data/vendor/cereal/include/cereal/types/stack.hpp +0 -76
- data/vendor/cereal/include/cereal/types/string.hpp +0 -61
- data/vendor/cereal/include/cereal/types/tuple.hpp +0 -123
- data/vendor/cereal/include/cereal/types/unordered_map.hpp +0 -36
- data/vendor/cereal/include/cereal/types/unordered_set.hpp +0 -99
- data/vendor/cereal/include/cereal/types/utility.hpp +0 -47
- data/vendor/cereal/include/cereal/types/valarray.hpp +0 -89
- data/vendor/cereal/include/cereal/types/variant.hpp +0 -109
- data/vendor/cereal/include/cereal/types/vector.hpp +0 -112
- data/vendor/cereal/include/cereal/version.hpp +0 -52
- data/vendor/isotree/src/Makevars +0 -4
- data/vendor/isotree/src/crit.cpp +0 -912
- data/vendor/isotree/src/dist.cpp +0 -749
- data/vendor/isotree/src/extended.cpp +0 -790
- data/vendor/isotree/src/fit_model.cpp +0 -1090
- data/vendor/isotree/src/helpers_iforest.cpp +0 -324
- data/vendor/isotree/src/isoforest.cpp +0 -771
- data/vendor/isotree/src/mult.cpp +0 -607
- data/vendor/isotree/src/predict.cpp +0 -853
- data/vendor/isotree/src/utils.cpp +0 -1566
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: a2ed0745b24db6c7d55b86cbf60394de590225509f3be5dbc79934606cd402cb
|
|
4
|
+
data.tar.gz: d1601d80e3878cc678544a245defd04831e6f11f4f374ecff6d1916e1618af6e
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: b461f501114e56810ed6075fba28ae79a611763838147670f357a8a9645000f139072bf3bb3a2120cc09ad302df429d39f6b67522b1c2bc18862e75a65266eb4
|
|
7
|
+
data.tar.gz: 57fc3f334fac2a6918ef95fb4827a67543a2aa5c1cdfcd86225c3411a409348593764109bcbc5155ddd6e49217f0764686847265550d145e72007556ae7fd00e
|
data/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,10 @@
|
|
|
1
|
+
## 0.3.0 (2022-06-13)
|
|
2
|
+
|
|
3
|
+
- Updated IsoTree to 0.5.16
|
|
4
|
+
- Updated serialization format (exported models must be recreated)
|
|
5
|
+
- Dropped support for Ruby < 2.7
|
|
6
|
+
- Dropped support for Windows
|
|
7
|
+
|
|
1
8
|
## 0.2.2 (2022-06-12)
|
|
2
9
|
|
|
3
10
|
- Fixed segfault when data is smaller than sample size
|
|
@@ -13,7 +20,7 @@
|
|
|
13
20
|
|
|
14
21
|
## 0.1.5 (2021-03-14)
|
|
15
22
|
|
|
16
|
-
- Updated
|
|
23
|
+
- Updated IsoTree to 0.1.25
|
|
17
24
|
- Added support for exporting and importing models
|
|
18
25
|
|
|
19
26
|
## 0.1.4 (2020-08-22)
|
data/LICENSE.txt
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
BSD 2-Clause License
|
|
2
2
|
|
|
3
|
-
Copyright (c) 2020, David Cortes
|
|
4
|
-
Copyright (c) 2020-
|
|
3
|
+
Copyright (c) 2020-2022, David Cortes
|
|
4
|
+
Copyright (c) 2020-2022, Andrew Kane
|
|
5
5
|
All rights reserved.
|
|
6
6
|
|
|
7
7
|
Redistribution and use in source and binary forms, with or without
|
data/README.md
CHANGED
|
@@ -16,6 +16,8 @@ Add this line to your application’s Gemfile:
|
|
|
16
16
|
gem "isotree"
|
|
17
17
|
```
|
|
18
18
|
|
|
19
|
+
Windows is not supported at the moment
|
|
20
|
+
|
|
19
21
|
## Getting Started
|
|
20
22
|
|
|
21
23
|
Prep your data
|
|
@@ -24,7 +26,8 @@ Prep your data
|
|
|
24
26
|
data = [
|
|
25
27
|
{department: "Books", sale: false, price: 2.50},
|
|
26
28
|
{department: "Books", sale: true, price: 3.00},
|
|
27
|
-
{department: "Movies", sale: false, price: 5.00}
|
|
29
|
+
{department: "Movies", sale: false, price: 5.00},
|
|
30
|
+
# ...
|
|
28
31
|
]
|
|
29
32
|
```
|
|
30
33
|
|
|
@@ -61,28 +64,38 @@ Pass parameters - default values below
|
|
|
61
64
|
|
|
62
65
|
```ruby
|
|
63
66
|
IsoTree::IsolationForest.new(
|
|
64
|
-
sample_size:
|
|
67
|
+
sample_size: "auto",
|
|
65
68
|
ntrees: 500,
|
|
66
69
|
ndim: 3,
|
|
67
|
-
ntry:
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
70
|
+
ntry: 1,
|
|
71
|
+
max_depth: "auto",
|
|
72
|
+
ncols_per_tree: nil,
|
|
73
|
+
prob_pick_pooled_gain: 0.0,
|
|
74
|
+
prob_pick_avg_gain: 0.0,
|
|
75
|
+
prob_pick_full_gain: 0.0,
|
|
76
|
+
prob_pick_dens: 0.0,
|
|
77
|
+
prob_pick_col_by_range: 0.0,
|
|
78
|
+
prob_pick_col_by_var: 0.0,
|
|
79
|
+
prob_pick_col_by_kurt: 0.0,
|
|
80
|
+
min_gain: 0.0,
|
|
81
|
+
missing_action: "auto",
|
|
82
|
+
new_categ_action: "auto",
|
|
83
|
+
categ_split_type: "auto",
|
|
76
84
|
all_perm: false,
|
|
77
85
|
coef_by_prop: false,
|
|
78
86
|
sample_with_replacement: false,
|
|
79
|
-
penalize_range:
|
|
87
|
+
penalize_range: false,
|
|
88
|
+
standardize_data: true,
|
|
89
|
+
scoring_metric: "depth",
|
|
90
|
+
fast_bratio: true,
|
|
80
91
|
weigh_by_kurtosis: false,
|
|
81
|
-
coefs: "
|
|
92
|
+
coefs: "uniform",
|
|
93
|
+
assume_full_distr: true,
|
|
82
94
|
min_imp_obs: 3,
|
|
83
95
|
depth_imp: "higher",
|
|
84
96
|
weigh_imp_rows: "inverse",
|
|
85
97
|
random_seed: 1,
|
|
98
|
+
use_long_double: false,
|
|
86
99
|
nthreads: -1
|
|
87
100
|
)
|
|
88
101
|
```
|
|
@@ -134,7 +147,6 @@ Check out [Trove](https://github.com/ankane/trove) for deploying models.
|
|
|
134
147
|
|
|
135
148
|
```sh
|
|
136
149
|
trove push model.bin
|
|
137
|
-
trove push model.bin.metadata
|
|
138
150
|
```
|
|
139
151
|
|
|
140
152
|
## Reference
|
|
@@ -145,6 +157,12 @@ Get the average isolation depth
|
|
|
145
157
|
model.predict(data, output: "avg_depth")
|
|
146
158
|
```
|
|
147
159
|
|
|
160
|
+
## Upgrading
|
|
161
|
+
|
|
162
|
+
### 0.3.0
|
|
163
|
+
|
|
164
|
+
This version uses IsoTree’s new serialization format. Exported models must be recreated.
|
|
165
|
+
|
|
148
166
|
## History
|
|
149
167
|
|
|
150
168
|
View the [changelog](https://github.com/ankane/isotree-ruby/blob/master/CHANGELOG.md)
|
data/ext/isotree/ext.cpp
CHANGED
|
@@ -1,3 +1,8 @@
|
|
|
1
|
+
// stdlib
|
|
2
|
+
#include <cmath>
|
|
3
|
+
#include <fstream>
|
|
4
|
+
#include <iostream>
|
|
5
|
+
|
|
1
6
|
// isotree
|
|
2
7
|
#include <isotree.hpp>
|
|
3
8
|
|
|
@@ -22,7 +27,7 @@ namespace Rice::detail
|
|
|
22
27
|
NewCategAction convert(VALUE x)
|
|
23
28
|
{
|
|
24
29
|
auto value = Object(x).to_s().str();
|
|
25
|
-
if (value == "weighted") return Weighted;
|
|
30
|
+
if (value == "weighted" || value == "impute") return Weighted;
|
|
26
31
|
if (value == "smallest") return Smallest;
|
|
27
32
|
if (value == "random") return Random;
|
|
28
33
|
throw std::runtime_error("Unknown new categ action: " + value);
|
|
@@ -96,6 +101,24 @@ namespace Rice::detail
|
|
|
96
101
|
throw std::runtime_error("Unknown weight imp rows: " + value);
|
|
97
102
|
}
|
|
98
103
|
};
|
|
104
|
+
|
|
105
|
+
template<>
|
|
106
|
+
class From_Ruby<ScoringMetric>
|
|
107
|
+
{
|
|
108
|
+
public:
|
|
109
|
+
ScoringMetric convert(VALUE x)
|
|
110
|
+
{
|
|
111
|
+
auto value = Object(x).to_s().str();
|
|
112
|
+
if (value == "depth") return Depth;
|
|
113
|
+
if (value == "adj_depth") return AdjDepth;
|
|
114
|
+
if (value == "density") return Density;
|
|
115
|
+
if (value == "adj_density") return AdjDensity;
|
|
116
|
+
if (value == "boxed_density") return BoxedDensity;
|
|
117
|
+
if (value == "boxed_density2") return BoxedDensity2;
|
|
118
|
+
if (value == "boxed_ratio") return BoxedRatio;
|
|
119
|
+
throw std::runtime_error("Unknown scoring metric: " + value);
|
|
120
|
+
}
|
|
121
|
+
};
|
|
99
122
|
}
|
|
100
123
|
|
|
101
124
|
extern "C"
|
|
@@ -118,20 +141,20 @@ void Init_ext()
|
|
|
118
141
|
size_t ncols_numeric = options.get<size_t, Symbol>("ncols_numeric");
|
|
119
142
|
size_t ncols_categ = options.get<size_t, Symbol>("ncols_categ");
|
|
120
143
|
|
|
121
|
-
|
|
144
|
+
real_t* numeric_data = NULL;
|
|
122
145
|
if (ncols_numeric > 0) {
|
|
123
146
|
numeric_data = (double*) options.get<String, Symbol>("numeric_data").c_str();
|
|
124
147
|
}
|
|
125
148
|
|
|
126
|
-
int
|
|
127
|
-
int
|
|
149
|
+
int* categorical_data = NULL;
|
|
150
|
+
int* ncat = NULL;
|
|
128
151
|
if (ncols_categ > 0) {
|
|
129
152
|
categorical_data = (int*) options.get<String, Symbol>("categorical_data").c_str();
|
|
130
153
|
ncat = (int*) options.get<String, Symbol>("ncat").c_str();
|
|
131
154
|
}
|
|
132
155
|
|
|
133
156
|
// not used (sparse matrices)
|
|
134
|
-
|
|
157
|
+
real_t* Xc = NULL;
|
|
135
158
|
sparse_ix* Xc_ind = NULL;
|
|
136
159
|
sparse_ix* Xc_indptr = NULL;
|
|
137
160
|
|
|
@@ -142,9 +165,7 @@ void Init_ext()
|
|
|
142
165
|
size_t ntrees = options.get<size_t, Symbol>("ntrees");
|
|
143
166
|
size_t ntry = options.get<size_t, Symbol>("ntry");
|
|
144
167
|
double prob_pick_by_gain_avg = options.get<double, Symbol>("prob_pick_avg_gain");
|
|
145
|
-
double prob_split_by_gain_avg = options.get<double, Symbol>("prob_split_avg_gain");
|
|
146
168
|
double prob_pick_by_gain_pl = options.get<double, Symbol>("prob_pick_pooled_gain");
|
|
147
|
-
double prob_split_by_gain_pl = options.get<double, Symbol>("prob_split_pooled_gain");
|
|
148
169
|
double min_gain = options.get<double, Symbol>("min_gain");
|
|
149
170
|
MissingAction missing_action = options.get<MissingAction, Symbol>("missing_action");
|
|
150
171
|
CategSplit cat_split_type = options.get<CategSplit, Symbol>("categ_split_type");
|
|
@@ -159,21 +180,31 @@ void Init_ext()
|
|
|
159
180
|
UseDepthImp depth_imp = options.get<UseDepthImp, Symbol>("depth_imp");
|
|
160
181
|
WeighImpRows weigh_imp_rows = options.get<WeighImpRows, Symbol>("weigh_imp_rows");
|
|
161
182
|
uint64_t random_seed = options.get<uint64_t, Symbol>("random_seed");
|
|
183
|
+
bool use_long_double = options.get<bool, Symbol>("use_long_double");
|
|
162
184
|
int nthreads = options.get<int, Symbol>("nthreads");
|
|
163
185
|
|
|
164
186
|
// TODO options
|
|
165
187
|
double* sample_weights = NULL;
|
|
166
|
-
bool weight_as_sample =
|
|
167
|
-
size_t max_depth =
|
|
168
|
-
bool limit_depth =
|
|
188
|
+
bool weight_as_sample = options.get<bool, Symbol>("weights_as_sample_prob");
|
|
189
|
+
size_t max_depth = options.get<size_t, Symbol>("max_depth");
|
|
190
|
+
bool limit_depth = options.get<bool, Symbol>("limit_depth");
|
|
169
191
|
bool standardize_dist = false;
|
|
170
192
|
double* tmat = NULL;
|
|
171
193
|
double* output_depths = NULL;
|
|
172
194
|
bool standardize_depth = false;
|
|
173
|
-
|
|
174
|
-
Imputer
|
|
195
|
+
real_t* col_weights = NULL;
|
|
196
|
+
Imputer* imputer = NULL;
|
|
175
197
|
bool impute_at_fit = false;
|
|
176
|
-
|
|
198
|
+
|
|
199
|
+
int ncols_per_tree = options.get<int, Symbol>("ncols_per_tree");
|
|
200
|
+
bool standardize_data = options.get<bool, Symbol>("standardize_data");
|
|
201
|
+
ScoringMetric scoring_metric = options.get<ScoringMetric, Symbol>("scoring_metric");
|
|
202
|
+
bool fast_bratio = options.get<bool, Symbol>("fast_bratio");
|
|
203
|
+
double prob_pick_by_full_gain = options.get<double, Symbol>("prob_pick_full_gain");
|
|
204
|
+
double prob_pick_by_dens = options.get<double, Symbol>("prob_pick_dens");
|
|
205
|
+
double prob_pick_col_by_range = options.get<double, Symbol>("prob_pick_col_by_range");
|
|
206
|
+
double prob_pick_col_by_var = options.get<double, Symbol>("prob_pick_col_by_var");
|
|
207
|
+
double prob_pick_col_by_kurt = options.get<double, Symbol>("prob_pick_col_by_kurt");
|
|
177
208
|
|
|
178
209
|
fit_iforest(
|
|
179
210
|
NULL,
|
|
@@ -197,18 +228,25 @@ void Init_ext()
|
|
|
197
228
|
sample_size,
|
|
198
229
|
ntrees,
|
|
199
230
|
max_depth,
|
|
231
|
+
ncols_per_tree,
|
|
200
232
|
limit_depth,
|
|
201
233
|
penalize_range,
|
|
234
|
+
standardize_data,
|
|
235
|
+
scoring_metric,
|
|
236
|
+
fast_bratio,
|
|
202
237
|
standardize_dist,
|
|
203
238
|
tmat,
|
|
204
239
|
output_depths,
|
|
205
240
|
standardize_depth,
|
|
206
241
|
col_weights,
|
|
207
242
|
weigh_by_kurt,
|
|
208
|
-
prob_pick_by_gain_avg,
|
|
209
|
-
prob_split_by_gain_avg,
|
|
210
243
|
prob_pick_by_gain_pl,
|
|
211
|
-
|
|
244
|
+
prob_pick_by_gain_avg,
|
|
245
|
+
prob_pick_by_full_gain,
|
|
246
|
+
prob_pick_by_dens,
|
|
247
|
+
prob_pick_col_by_range,
|
|
248
|
+
prob_pick_col_by_var,
|
|
249
|
+
prob_pick_col_by_kurt,
|
|
212
250
|
min_gain,
|
|
213
251
|
missing_action,
|
|
214
252
|
cat_split_type,
|
|
@@ -220,7 +258,7 @@ void Init_ext()
|
|
|
220
258
|
weigh_imp_rows,
|
|
221
259
|
impute_at_fit,
|
|
222
260
|
random_seed,
|
|
223
|
-
|
|
261
|
+
use_long_double,
|
|
224
262
|
nthreads
|
|
225
263
|
);
|
|
226
264
|
|
|
@@ -234,21 +272,21 @@ void Init_ext()
|
|
|
234
272
|
size_t ncols_numeric = options.get<size_t, Symbol>("ncols_numeric");
|
|
235
273
|
size_t ncols_categ = options.get<size_t, Symbol>("ncols_categ");
|
|
236
274
|
|
|
237
|
-
|
|
275
|
+
real_t* numeric_data = NULL;
|
|
238
276
|
if (ncols_numeric > 0) {
|
|
239
277
|
numeric_data = (double*) options.get<String, Symbol>("numeric_data").c_str();
|
|
240
278
|
}
|
|
241
279
|
|
|
242
|
-
int
|
|
280
|
+
int* categorical_data = NULL;
|
|
243
281
|
if (ncols_categ > 0) {
|
|
244
282
|
categorical_data = (int*) options.get<String, Symbol>("categorical_data").c_str();
|
|
245
283
|
}
|
|
246
284
|
|
|
247
285
|
// not used (sparse matrices)
|
|
248
|
-
|
|
286
|
+
real_t* Xc = NULL;
|
|
249
287
|
sparse_ix* Xc_ind = NULL;
|
|
250
288
|
sparse_ix* Xc_indptr = NULL;
|
|
251
|
-
|
|
289
|
+
real_t* Xr = NULL;
|
|
252
290
|
sparse_ix* Xr_ind = NULL;
|
|
253
291
|
sparse_ix* Xr_indptr = NULL;
|
|
254
292
|
|
|
@@ -257,10 +295,17 @@ void Init_ext()
|
|
|
257
295
|
bool standardize = options.get<bool, Symbol>("standardize");
|
|
258
296
|
std::vector<double> outlier_scores(nrows);
|
|
259
297
|
sparse_ix* tree_num = NULL;
|
|
298
|
+
bool is_col_major = true;
|
|
299
|
+
size_t ld_numeric = 0;
|
|
300
|
+
size_t ld_categ = 0;
|
|
301
|
+
double* per_tree_depths = NULL;
|
|
260
302
|
|
|
261
303
|
predict_iforest(
|
|
262
304
|
numeric_data,
|
|
263
305
|
categorical_data,
|
|
306
|
+
is_col_major,
|
|
307
|
+
ld_numeric,
|
|
308
|
+
ld_categ,
|
|
264
309
|
Xc,
|
|
265
310
|
Xc_ind,
|
|
266
311
|
Xc_indptr,
|
|
@@ -273,7 +318,9 @@ void Init_ext()
|
|
|
273
318
|
NULL,
|
|
274
319
|
&iso,
|
|
275
320
|
outlier_scores.data(),
|
|
276
|
-
tree_num
|
|
321
|
+
tree_num,
|
|
322
|
+
per_tree_depths,
|
|
323
|
+
NULL
|
|
277
324
|
);
|
|
278
325
|
|
|
279
326
|
Array ret;
|
|
@@ -283,27 +330,93 @@ void Init_ext()
|
|
|
283
330
|
return ret;
|
|
284
331
|
})
|
|
285
332
|
.define_singleton_function(
|
|
286
|
-
"
|
|
287
|
-
[](ExtIsoForest& iso, String path) {
|
|
333
|
+
"serialize_combined",
|
|
334
|
+
[](ExtIsoForest& iso, String path, String metadata) {
|
|
288
335
|
#ifdef _MSC_VER
|
|
289
336
|
// TODO convert to wchar_t
|
|
290
337
|
throw std::runtime_error("Not supported on Windows yet");
|
|
291
338
|
#else
|
|
292
|
-
|
|
339
|
+
std::ofstream file;
|
|
340
|
+
file.open(path.c_str());
|
|
341
|
+
serialize_combined(
|
|
342
|
+
NULL,
|
|
343
|
+
&iso,
|
|
344
|
+
NULL,
|
|
345
|
+
NULL,
|
|
346
|
+
metadata.c_str(),
|
|
347
|
+
// returns bytesize (RSTRING_LEN)
|
|
348
|
+
metadata.length(),
|
|
349
|
+
file
|
|
350
|
+
);
|
|
351
|
+
file.close();
|
|
293
352
|
#endif
|
|
294
353
|
})
|
|
295
354
|
.define_singleton_function(
|
|
296
|
-
"
|
|
355
|
+
"deserialize_combined",
|
|
297
356
|
[](String path) {
|
|
298
|
-
ExtIsoForest iso;
|
|
299
|
-
|
|
300
357
|
#ifdef _MSC_VER
|
|
301
358
|
// TODO convert to wchar_t
|
|
302
359
|
throw std::runtime_error("Not supported on Windows yet");
|
|
303
360
|
#else
|
|
304
|
-
|
|
305
|
-
#endif
|
|
361
|
+
Array ret;
|
|
306
362
|
|
|
307
|
-
|
|
363
|
+
std::ifstream file;
|
|
364
|
+
file.open(path.c_str(), std::ios_base::in | std::ios_base::binary);
|
|
365
|
+
if (!file) {
|
|
366
|
+
throw std::runtime_error("Cannot open file");
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
bool is_isotree_model = false;
|
|
370
|
+
bool is_compatible = false;
|
|
371
|
+
bool has_combined_objects = false;
|
|
372
|
+
bool has_IsoForest = false;
|
|
373
|
+
bool has_ExtIsoForest = false;
|
|
374
|
+
bool has_Imputer = false;
|
|
375
|
+
bool has_Indexer = false;
|
|
376
|
+
bool has_metadata = false;
|
|
377
|
+
size_t size_metadata = 0;
|
|
378
|
+
|
|
379
|
+
inspect_serialized_object(
|
|
380
|
+
file,
|
|
381
|
+
is_isotree_model,
|
|
382
|
+
is_compatible,
|
|
383
|
+
has_combined_objects,
|
|
384
|
+
has_IsoForest,
|
|
385
|
+
has_ExtIsoForest,
|
|
386
|
+
has_Imputer,
|
|
387
|
+
has_Indexer,
|
|
388
|
+
has_metadata,
|
|
389
|
+
size_metadata
|
|
390
|
+
);
|
|
391
|
+
|
|
392
|
+
if (!is_isotree_model || !has_combined_objects) {
|
|
393
|
+
throw std::runtime_error("Input file is not a serialized isotree model");
|
|
394
|
+
}
|
|
395
|
+
if (!is_compatible) {
|
|
396
|
+
throw std::runtime_error("Model file format is incompatible");
|
|
397
|
+
}
|
|
398
|
+
if (size_metadata == 0) {
|
|
399
|
+
throw std::runtime_error("Input file does not contain metadata");
|
|
400
|
+
}
|
|
401
|
+
|
|
402
|
+
IsoForest model = IsoForest();
|
|
403
|
+
ExtIsoForest model_ext = ExtIsoForest();
|
|
404
|
+
Imputer imputer = Imputer();
|
|
405
|
+
TreesIndexer indexer = TreesIndexer();
|
|
406
|
+
char *optional_metadata = (char*) calloc(size_metadata, sizeof(char));
|
|
407
|
+
if (optional_metadata == NULL) {
|
|
408
|
+
throw std::runtime_error("Cannot allocate memory");
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
deserialize_combined(file, &model, &model_ext, &imputer, &indexer, optional_metadata);
|
|
412
|
+
file.close();
|
|
413
|
+
|
|
414
|
+
ret.push(Object(Rice::detail::To_Ruby<ExtIsoForest>().convert(model_ext)));
|
|
415
|
+
ret.push(String(std::string(optional_metadata, size_metadata)));
|
|
416
|
+
|
|
417
|
+
free(optional_metadata);
|
|
418
|
+
|
|
419
|
+
return ret;
|
|
420
|
+
#endif
|
|
308
421
|
});
|
|
309
422
|
}
|
data/ext/isotree/extconf.rb
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
require "mkmf-rice"
|
|
2
2
|
|
|
3
|
-
$CXXFLAGS += " -std=c++17 $(optflags) -
|
|
3
|
+
$CXXFLAGS += " -std=c++17 $(optflags) -D_USE_XOSHIRO -DSUPPORTS_RESTRICT=1 -D_USE_ROBIN_MAP -DDONT_THROW_ON_INTERRUPT"
|
|
4
4
|
|
|
5
5
|
apple_clang = RbConfig::CONFIG["CC_VERSION_MESSAGE"] =~ /apple clang/i
|
|
6
6
|
|
|
@@ -11,12 +11,12 @@ if have_library("omp") || have_library("gomp")
|
|
|
11
11
|
end
|
|
12
12
|
|
|
13
13
|
ext = File.expand_path(".", __dir__)
|
|
14
|
-
|
|
15
|
-
|
|
14
|
+
isotree_src = File.expand_path("../../vendor/isotree/src", __dir__)
|
|
15
|
+
isotree_inc = File.expand_path("../../vendor/isotree/include", __dir__)
|
|
16
16
|
|
|
17
|
-
exclude = %w(Rwrapper.cpp RcppExports.cpp)
|
|
18
|
-
$srcs = Dir["{#{ext},#{
|
|
19
|
-
$INCFLAGS << " -I#{
|
|
20
|
-
$VPATH <<
|
|
17
|
+
exclude = %w(c_interface.cpp Rwrapper.cpp RcppExports.cpp)
|
|
18
|
+
$srcs = Dir["{#{ext},#{isotree_src}}/*.{cc,cpp}"].reject { |f| exclude.include?(File.basename(f)) }
|
|
19
|
+
$INCFLAGS << " -I#{isotree_inc}"
|
|
20
|
+
$VPATH << isotree_src
|
|
21
21
|
|
|
22
22
|
create_makefile("isotree/ext")
|