isotree 0.1.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +25 -0
- data/LICENSE.txt +2 -1
- data/README.md +57 -6
- data/ext/isotree/ext.cpp +170 -39
- data/ext/isotree/extconf.rb +3 -3
- data/lib/isotree.rb +2 -0
- data/lib/isotree/dataset.rb +73 -0
- data/lib/isotree/isolation_forest.rb +182 -29
- data/lib/isotree/version.rb +1 -1
- data/vendor/cereal/LICENSE +24 -0
- data/vendor/cereal/README.md +85 -0
- data/vendor/cereal/include/cereal/access.hpp +351 -0
- data/vendor/cereal/include/cereal/archives/adapters.hpp +163 -0
- data/vendor/cereal/include/cereal/archives/binary.hpp +169 -0
- data/vendor/cereal/include/cereal/archives/json.hpp +1019 -0
- data/vendor/cereal/include/cereal/archives/portable_binary.hpp +334 -0
- data/vendor/cereal/include/cereal/archives/xml.hpp +956 -0
- data/vendor/cereal/include/cereal/cereal.hpp +1089 -0
- data/vendor/cereal/include/cereal/details/helpers.hpp +422 -0
- data/vendor/cereal/include/cereal/details/polymorphic_impl.hpp +796 -0
- data/vendor/cereal/include/cereal/details/polymorphic_impl_fwd.hpp +65 -0
- data/vendor/cereal/include/cereal/details/static_object.hpp +127 -0
- data/vendor/cereal/include/cereal/details/traits.hpp +1411 -0
- data/vendor/cereal/include/cereal/details/util.hpp +84 -0
- data/vendor/cereal/include/cereal/external/base64.hpp +134 -0
- data/vendor/cereal/include/cereal/external/rapidjson/allocators.h +284 -0
- data/vendor/cereal/include/cereal/external/rapidjson/cursorstreamwrapper.h +78 -0
- data/vendor/cereal/include/cereal/external/rapidjson/document.h +2652 -0
- data/vendor/cereal/include/cereal/external/rapidjson/encodedstream.h +299 -0
- data/vendor/cereal/include/cereal/external/rapidjson/encodings.h +716 -0
- data/vendor/cereal/include/cereal/external/rapidjson/error/en.h +74 -0
- data/vendor/cereal/include/cereal/external/rapidjson/error/error.h +161 -0
- data/vendor/cereal/include/cereal/external/rapidjson/filereadstream.h +99 -0
- data/vendor/cereal/include/cereal/external/rapidjson/filewritestream.h +104 -0
- data/vendor/cereal/include/cereal/external/rapidjson/fwd.h +151 -0
- data/vendor/cereal/include/cereal/external/rapidjson/internal/biginteger.h +290 -0
- data/vendor/cereal/include/cereal/external/rapidjson/internal/diyfp.h +271 -0
- data/vendor/cereal/include/cereal/external/rapidjson/internal/dtoa.h +245 -0
- data/vendor/cereal/include/cereal/external/rapidjson/internal/ieee754.h +78 -0
- data/vendor/cereal/include/cereal/external/rapidjson/internal/itoa.h +308 -0
- data/vendor/cereal/include/cereal/external/rapidjson/internal/meta.h +186 -0
- data/vendor/cereal/include/cereal/external/rapidjson/internal/pow10.h +55 -0
- data/vendor/cereal/include/cereal/external/rapidjson/internal/regex.h +740 -0
- data/vendor/cereal/include/cereal/external/rapidjson/internal/stack.h +232 -0
- data/vendor/cereal/include/cereal/external/rapidjson/internal/strfunc.h +69 -0
- data/vendor/cereal/include/cereal/external/rapidjson/internal/strtod.h +290 -0
- data/vendor/cereal/include/cereal/external/rapidjson/internal/swap.h +46 -0
- data/vendor/cereal/include/cereal/external/rapidjson/istreamwrapper.h +128 -0
- data/vendor/cereal/include/cereal/external/rapidjson/memorybuffer.h +70 -0
- data/vendor/cereal/include/cereal/external/rapidjson/memorystream.h +71 -0
- data/vendor/cereal/include/cereal/external/rapidjson/msinttypes/inttypes.h +316 -0
- data/vendor/cereal/include/cereal/external/rapidjson/msinttypes/stdint.h +300 -0
- data/vendor/cereal/include/cereal/external/rapidjson/ostreamwrapper.h +81 -0
- data/vendor/cereal/include/cereal/external/rapidjson/pointer.h +1414 -0
- data/vendor/cereal/include/cereal/external/rapidjson/prettywriter.h +277 -0
- data/vendor/cereal/include/cereal/external/rapidjson/rapidjson.h +656 -0
- data/vendor/cereal/include/cereal/external/rapidjson/reader.h +2230 -0
- data/vendor/cereal/include/cereal/external/rapidjson/schema.h +2497 -0
- data/vendor/cereal/include/cereal/external/rapidjson/stream.h +223 -0
- data/vendor/cereal/include/cereal/external/rapidjson/stringbuffer.h +121 -0
- data/vendor/cereal/include/cereal/external/rapidjson/writer.h +709 -0
- data/vendor/cereal/include/cereal/external/rapidxml/license.txt +52 -0
- data/vendor/cereal/include/cereal/external/rapidxml/manual.html +406 -0
- data/vendor/cereal/include/cereal/external/rapidxml/rapidxml.hpp +2624 -0
- data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_iterators.hpp +175 -0
- data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_print.hpp +428 -0
- data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_utils.hpp +123 -0
- data/vendor/cereal/include/cereal/macros.hpp +154 -0
- data/vendor/cereal/include/cereal/specialize.hpp +139 -0
- data/vendor/cereal/include/cereal/types/array.hpp +79 -0
- data/vendor/cereal/include/cereal/types/atomic.hpp +55 -0
- data/vendor/cereal/include/cereal/types/base_class.hpp +203 -0
- data/vendor/cereal/include/cereal/types/bitset.hpp +176 -0
- data/vendor/cereal/include/cereal/types/boost_variant.hpp +164 -0
- data/vendor/cereal/include/cereal/types/chrono.hpp +72 -0
- data/vendor/cereal/include/cereal/types/common.hpp +129 -0
- data/vendor/cereal/include/cereal/types/complex.hpp +56 -0
- data/vendor/cereal/include/cereal/types/concepts/pair_associative_container.hpp +73 -0
- data/vendor/cereal/include/cereal/types/deque.hpp +62 -0
- data/vendor/cereal/include/cereal/types/forward_list.hpp +68 -0
- data/vendor/cereal/include/cereal/types/functional.hpp +43 -0
- data/vendor/cereal/include/cereal/types/list.hpp +62 -0
- data/vendor/cereal/include/cereal/types/map.hpp +36 -0
- data/vendor/cereal/include/cereal/types/memory.hpp +425 -0
- data/vendor/cereal/include/cereal/types/optional.hpp +66 -0
- data/vendor/cereal/include/cereal/types/polymorphic.hpp +483 -0
- data/vendor/cereal/include/cereal/types/queue.hpp +132 -0
- data/vendor/cereal/include/cereal/types/set.hpp +103 -0
- data/vendor/cereal/include/cereal/types/stack.hpp +76 -0
- data/vendor/cereal/include/cereal/types/string.hpp +61 -0
- data/vendor/cereal/include/cereal/types/tuple.hpp +123 -0
- data/vendor/cereal/include/cereal/types/unordered_map.hpp +36 -0
- data/vendor/cereal/include/cereal/types/unordered_set.hpp +99 -0
- data/vendor/cereal/include/cereal/types/utility.hpp +47 -0
- data/vendor/cereal/include/cereal/types/valarray.hpp +89 -0
- data/vendor/cereal/include/cereal/types/variant.hpp +109 -0
- data/vendor/cereal/include/cereal/types/vector.hpp +112 -0
- data/vendor/cereal/include/cereal/version.hpp +52 -0
- data/vendor/isotree/LICENSE +1 -1
- data/vendor/isotree/README.md +7 -2
- data/vendor/isotree/src/RcppExports.cpp +44 -4
- data/vendor/isotree/src/Rwrapper.cpp +141 -51
- data/vendor/isotree/src/crit.cpp +1 -1
- data/vendor/isotree/src/dealloc.cpp +1 -1
- data/vendor/isotree/src/dist.cpp +6 -6
- data/vendor/isotree/src/extended.cpp +5 -5
- data/vendor/isotree/src/fit_model.cpp +27 -5
- data/vendor/isotree/src/helpers_iforest.cpp +26 -11
- data/vendor/isotree/src/impute.cpp +7 -7
- data/vendor/isotree/src/isoforest.cpp +7 -7
- data/vendor/isotree/src/isotree.hpp +27 -5
- data/vendor/isotree/src/merge_models.cpp +1 -1
- data/vendor/isotree/src/mult.cpp +1 -1
- data/vendor/isotree/src/predict.cpp +20 -16
- data/vendor/isotree/src/serialize.cpp +1 -1
- data/vendor/isotree/src/sql.cpp +545 -0
- data/vendor/isotree/src/utils.cpp +36 -44
- metadata +102 -81
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 95dc93ac1b84a5a37539b335da0457955ee8868997a34a9c249f7c54927f4b04
|
4
|
+
data.tar.gz: eff22a02afce64167248e967d384b0c9b2259f2f5248cfad5bd37acd8bc44e2a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9f410b78af1ae72f4cd166511b6b676f3b71bc39ce92641c455879a3aa88172abb7d1c37ffc953d505fdd59db07d355c9ffba20c3577a1d9b2311c5959a4c87f
|
7
|
+
data.tar.gz: 4804ec4aa11997fb91bcd714a0569e0571970bf06d4ad9b491f0e450c33183c86d37a922577ec296765708e9599272863cfa7f775f7dca4d9d18346ed8a38d87
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,28 @@
|
|
1
|
+
## 0.2.0 (2021-05-17)
|
2
|
+
|
3
|
+
- Updated to Rice 4
|
4
|
+
- Dropped support for Ruby < 2.6
|
5
|
+
|
6
|
+
## 0.1.5 (2021-03-14)
|
7
|
+
|
8
|
+
- Updated Isotree to 0.1.25
|
9
|
+
- Added support for exporting and importing models
|
10
|
+
|
11
|
+
## 0.1.4 (2020-08-22)
|
12
|
+
|
13
|
+
- Added `missing_action`, `new_categ_action`, `categ_split_type`, `coefs`, `depth_imp`, and `weigh_imp_rows` options
|
14
|
+
- Fixed signal handling
|
15
|
+
|
16
|
+
## 0.1.3 (2020-08-13)
|
17
|
+
|
18
|
+
- Added support for categorical data
|
19
|
+
- Added support for Rover data frames
|
20
|
+
- Added `output` option to `predict` method
|
21
|
+
|
22
|
+
## 0.1.2 (2020-08-11)
|
23
|
+
|
24
|
+
- Fixed outlier scores
|
25
|
+
|
1
26
|
## 0.1.1 (2020-08-10)
|
2
27
|
|
3
28
|
- Fixed installation error when cereal not installed
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -4,7 +4,9 @@
|
|
4
4
|
|
5
5
|
Learn how [Isolation Forest](https://www.youtube.com/watch?v=RyFQXQf4w4w) works
|
6
6
|
|
7
|
-
[
|
7
|
+
:deciduous_tree: Check out [OutlierTree](https://github.com/ankane/outliertree) for human-readable explanations of outliers
|
8
|
+
|
9
|
+
[![Build Status](https://github.com/ankane/isotree/workflows/build/badge.svg?branch=master)](https://github.com/ankane/isotree/actions)
|
8
10
|
|
9
11
|
## Installation
|
10
12
|
|
@@ -19,24 +21,40 @@ gem 'isotree'
|
|
19
21
|
Prep your data
|
20
22
|
|
21
23
|
```ruby
|
22
|
-
|
24
|
+
data = [
|
25
|
+
{department: "Books", sale: false, price: 2.50},
|
26
|
+
{department: "Books", sale: true, price: 3.00},
|
27
|
+
{department: "Movies", sale: false, price: 5.00}
|
28
|
+
]
|
23
29
|
```
|
24
30
|
|
25
31
|
Train a model
|
26
32
|
|
27
33
|
```ruby
|
28
34
|
model = IsoTree::IsolationForest.new
|
29
|
-
model.fit(
|
35
|
+
model.fit(data)
|
30
36
|
```
|
31
37
|
|
32
38
|
Get outlier scores
|
33
39
|
|
34
40
|
```ruby
|
35
|
-
model.predict(
|
41
|
+
model.predict(data)
|
36
42
|
```
|
37
43
|
|
38
44
|
Scores are between 0 and 1, with higher scores indicating outliers
|
39
45
|
|
46
|
+
Export the model
|
47
|
+
|
48
|
+
```ruby
|
49
|
+
model.export_model("model.bin")
|
50
|
+
```
|
51
|
+
|
52
|
+
Import a model
|
53
|
+
|
54
|
+
```ruby
|
55
|
+
model = IsoTree::IsolationForest.import_model("model.bin")
|
56
|
+
```
|
57
|
+
|
40
58
|
## Parameters
|
41
59
|
|
42
60
|
Pass parameters - default values below
|
@@ -52,12 +70,18 @@ IsoTree::IsolationForest.new(
|
|
52
70
|
prob_split_avg_gain: 0,
|
53
71
|
prob_split_pooled_gain: 0,
|
54
72
|
min_gain: 0,
|
73
|
+
missing_action: "impute",
|
74
|
+
new_categ_action: "smallest",
|
75
|
+
categ_split_type: "subset",
|
55
76
|
all_perm: false,
|
56
77
|
coef_by_prop: false,
|
57
78
|
sample_with_replacement: false,
|
58
79
|
penalize_range: true,
|
59
80
|
weigh_by_kurtosis: false,
|
81
|
+
coefs: "normal",
|
60
82
|
min_imp_obs: 3,
|
83
|
+
depth_imp: "higher",
|
84
|
+
weigh_imp_rows: "inverse",
|
61
85
|
random_seed: 1,
|
62
86
|
nthreads: -1
|
63
87
|
)
|
@@ -67,10 +91,20 @@ See a [detailed explanation](https://isotree.readthedocs.io/en/latest/#isotree.I
|
|
67
91
|
|
68
92
|
## Data
|
69
93
|
|
70
|
-
Data can be an array of
|
94
|
+
Data can be an array of hashes
|
71
95
|
|
72
96
|
```ruby
|
73
|
-
[
|
97
|
+
[
|
98
|
+
{department: "Books", sale: false, price: 2.50},
|
99
|
+
{department: "Books", sale: true, price: 3.00},
|
100
|
+
{department: "Movies", sale: false, price: 5.00}
|
101
|
+
]
|
102
|
+
```
|
103
|
+
|
104
|
+
Or a Rover data frame
|
105
|
+
|
106
|
+
```ruby
|
107
|
+
Rover.read_csv("data.csv")
|
74
108
|
```
|
75
109
|
|
76
110
|
Or a Numo array
|
@@ -94,6 +128,23 @@ gem uninstall isotree --force
|
|
94
128
|
bundle install
|
95
129
|
```
|
96
130
|
|
131
|
+
## Deployment
|
132
|
+
|
133
|
+
Check out [Trove](https://github.com/ankane/trove) for deploying models.
|
134
|
+
|
135
|
+
```sh
|
136
|
+
trove push model.bin
|
137
|
+
trove push model.bin.metadata
|
138
|
+
```
|
139
|
+
|
140
|
+
## Reference
|
141
|
+
|
142
|
+
Get the average isolation depth
|
143
|
+
|
144
|
+
```ruby
|
145
|
+
model.predict(data, output: "avg_depth")
|
146
|
+
```
|
147
|
+
|
97
148
|
## History
|
98
149
|
|
99
150
|
View the [changelog](https://github.com/ankane/isotree/blob/master/CHANGELOG.md)
|
data/ext/isotree/ext.cpp
CHANGED
@@ -2,20 +2,102 @@
|
|
2
2
|
#include <isotree.hpp>
|
3
3
|
|
4
4
|
// rice
|
5
|
-
#include <rice/
|
6
|
-
#include <rice/Hash.hpp>
|
7
|
-
#include <rice/Module.hpp>
|
8
|
-
#include <rice/String.hpp>
|
9
|
-
#include <rice/Symbol.hpp>
|
5
|
+
#include <rice/rice.hpp>
|
10
6
|
|
11
7
|
using Rice::Array;
|
12
8
|
using Rice::Hash;
|
13
9
|
using Rice::Module;
|
10
|
+
using Rice::Object;
|
14
11
|
using Rice::String;
|
15
12
|
using Rice::Symbol;
|
16
13
|
using Rice::define_class_under;
|
17
14
|
using Rice::define_module;
|
18
15
|
|
16
|
+
namespace Rice::detail
|
17
|
+
{
|
18
|
+
template<>
|
19
|
+
class From_Ruby<NewCategAction>
|
20
|
+
{
|
21
|
+
public:
|
22
|
+
NewCategAction convert(VALUE x)
|
23
|
+
{
|
24
|
+
auto value = Object(x).to_s().str();
|
25
|
+
if (value == "weighted") return Weighted;
|
26
|
+
if (value == "smallest") return Smallest;
|
27
|
+
if (value == "random") return Random;
|
28
|
+
throw std::runtime_error("Unknown new categ action: " + value);
|
29
|
+
}
|
30
|
+
};
|
31
|
+
|
32
|
+
template<>
|
33
|
+
class From_Ruby<MissingAction>
|
34
|
+
{
|
35
|
+
public:
|
36
|
+
MissingAction convert(VALUE x)
|
37
|
+
{
|
38
|
+
auto value = Object(x).to_s().str();
|
39
|
+
if (value == "divide") return Divide;
|
40
|
+
if (value == "impute") return Impute;
|
41
|
+
if (value == "fail") return Fail;
|
42
|
+
throw std::runtime_error("Unknown missing action: " + value);
|
43
|
+
}
|
44
|
+
};
|
45
|
+
|
46
|
+
template<>
|
47
|
+
class From_Ruby<CategSplit>
|
48
|
+
{
|
49
|
+
public:
|
50
|
+
CategSplit convert(VALUE x)
|
51
|
+
{
|
52
|
+
auto value = Object(x).to_s().str();
|
53
|
+
if (value == "subset") return SubSet;
|
54
|
+
if (value == "single_categ") return SingleCateg;
|
55
|
+
throw std::runtime_error("Unknown categ split: " + value);
|
56
|
+
}
|
57
|
+
};
|
58
|
+
|
59
|
+
template<>
|
60
|
+
class From_Ruby<CoefType>
|
61
|
+
{
|
62
|
+
public:
|
63
|
+
CoefType convert(VALUE x)
|
64
|
+
{
|
65
|
+
auto value = Object(x).to_s().str();
|
66
|
+
if (value == "uniform") return Uniform;
|
67
|
+
if (value == "normal") return Normal;
|
68
|
+
throw std::runtime_error("Unknown coef type: " + value);
|
69
|
+
}
|
70
|
+
};
|
71
|
+
|
72
|
+
template<>
|
73
|
+
class From_Ruby<UseDepthImp>
|
74
|
+
{
|
75
|
+
public:
|
76
|
+
UseDepthImp convert(VALUE x)
|
77
|
+
{
|
78
|
+
auto value = Object(x).to_s().str();
|
79
|
+
if (value == "lower") return Lower;
|
80
|
+
if (value == "higher") return Higher;
|
81
|
+
if (value == "same") return Same;
|
82
|
+
throw std::runtime_error("Unknown depth imp: " + value);
|
83
|
+
}
|
84
|
+
};
|
85
|
+
|
86
|
+
template<>
|
87
|
+
class From_Ruby<WeighImpRows>
|
88
|
+
{
|
89
|
+
public:
|
90
|
+
WeighImpRows convert(VALUE x)
|
91
|
+
{
|
92
|
+
auto value = Object(x).to_s().str();
|
93
|
+
if (value == "inverse") return Inverse;
|
94
|
+
if (value == "prop") return Prop;
|
95
|
+
if (value == "flat") return Flat;
|
96
|
+
throw std::runtime_error("Unknown weight imp rows: " + value);
|
97
|
+
}
|
98
|
+
};
|
99
|
+
}
|
100
|
+
|
19
101
|
extern "C"
|
20
102
|
void Init_ext()
|
21
103
|
{
|
@@ -25,44 +107,36 @@ void Init_ext()
|
|
25
107
|
define_class_under<ExtIsoForest>(rb_mExt, "ExtIsoForest");
|
26
108
|
|
27
109
|
rb_mExt
|
28
|
-
.
|
110
|
+
.define_singleton_function(
|
29
111
|
"fit_iforest",
|
30
|
-
|
112
|
+
[](Hash options) {
|
31
113
|
// model
|
32
114
|
ExtIsoForest iso;
|
33
115
|
|
34
116
|
// data
|
35
117
|
size_t nrows = options.get<size_t, Symbol>("nrows");
|
36
|
-
size_t
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
118
|
+
size_t ncols_numeric = options.get<size_t, Symbol>("ncols_numeric");
|
119
|
+
size_t ncols_categ = options.get<size_t, Symbol>("ncols_categ");
|
120
|
+
|
121
|
+
double *restrict numeric_data = NULL;
|
122
|
+
if (ncols_numeric > 0) {
|
123
|
+
numeric_data = (double*) options.get<String, Symbol>("numeric_data").c_str();
|
124
|
+
}
|
125
|
+
|
126
|
+
int *restrict categorical_data = NULL;
|
127
|
+
int *restrict ncat = NULL;
|
128
|
+
if (ncols_categ > 0) {
|
129
|
+
categorical_data = (int*) options.get<String, Symbol>("categorical_data").c_str();
|
130
|
+
ncat = (int*) options.get<String, Symbol>("ncat").c_str();
|
131
|
+
}
|
132
|
+
|
133
|
+
// not used (sparse matrices)
|
42
134
|
double* Xc = NULL;
|
43
135
|
sparse_ix* Xc_ind = NULL;
|
44
136
|
sparse_ix* Xc_indptr = NULL;
|
45
137
|
|
46
138
|
// options
|
47
|
-
|
48
|
-
double* sample_weights = NULL;
|
49
|
-
bool weight_as_sample = false;
|
50
|
-
size_t max_depth = 0;
|
51
|
-
bool limit_depth = true;
|
52
|
-
bool standardize_dist = false;
|
53
|
-
double* tmat = NULL;
|
54
|
-
double* output_depths = NULL;
|
55
|
-
bool standardize_depth = false;
|
56
|
-
double* col_weights = NULL;
|
57
|
-
MissingAction missing_action = Impute;
|
58
|
-
CategSplit cat_split_type = SubSet;
|
59
|
-
NewCategAction new_cat_action = Smallest;
|
60
|
-
Imputer *imputer = NULL;
|
61
|
-
UseDepthImp depth_imp = Higher;
|
62
|
-
WeighImpRows weigh_imp_rows = Inverse;
|
63
|
-
bool impute_at_fit = false;
|
64
|
-
|
65
|
-
// Rice has limit of 14 arguments, so use hash for options
|
139
|
+
// Rice has limit of 14 arguments, so use hash
|
66
140
|
size_t sample_size = options.get<size_t, Symbol>("sample_size");
|
67
141
|
size_t ndim = options.get<size_t, Symbol>("ndim");
|
68
142
|
size_t ntrees = options.get<size_t, Symbol>("ntrees");
|
@@ -72,21 +146,41 @@ void Init_ext()
|
|
72
146
|
double prob_pick_by_gain_pl = options.get<double, Symbol>("prob_pick_pooled_gain");
|
73
147
|
double prob_split_by_gain_pl = options.get<double, Symbol>("prob_split_pooled_gain");
|
74
148
|
double min_gain = options.get<double, Symbol>("min_gain");
|
149
|
+
MissingAction missing_action = options.get<MissingAction, Symbol>("missing_action");
|
150
|
+
CategSplit cat_split_type = options.get<CategSplit, Symbol>("categ_split_type");
|
151
|
+
NewCategAction new_cat_action = options.get<NewCategAction, Symbol>("new_categ_action");
|
75
152
|
bool all_perm = options.get<bool, Symbol>("all_perm");
|
76
153
|
bool coef_by_prop = options.get<bool, Symbol>("coef_by_prop");
|
77
154
|
bool with_replacement = options.get<bool, Symbol>("sample_with_replacement");
|
78
155
|
bool penalize_range = options.get<bool, Symbol>("penalize_range");
|
79
156
|
bool weigh_by_kurt = options.get<bool, Symbol>("weigh_by_kurtosis");
|
157
|
+
CoefType coef_type = options.get<CoefType, Symbol>("coefs");
|
80
158
|
size_t min_imp_obs = options.get<size_t, Symbol>("min_imp_obs");
|
159
|
+
UseDepthImp depth_imp = options.get<UseDepthImp, Symbol>("depth_imp");
|
160
|
+
WeighImpRows weigh_imp_rows = options.get<WeighImpRows, Symbol>("weigh_imp_rows");
|
81
161
|
uint64_t random_seed = options.get<uint64_t, Symbol>("random_seed");
|
82
162
|
int nthreads = options.get<int, Symbol>("nthreads");
|
83
163
|
|
164
|
+
// TODO options
|
165
|
+
double* sample_weights = NULL;
|
166
|
+
bool weight_as_sample = false;
|
167
|
+
size_t max_depth = 0;
|
168
|
+
bool limit_depth = true;
|
169
|
+
bool standardize_dist = false;
|
170
|
+
double* tmat = NULL;
|
171
|
+
double* output_depths = NULL;
|
172
|
+
bool standardize_depth = false;
|
173
|
+
double* col_weights = NULL;
|
174
|
+
Imputer *imputer = NULL;
|
175
|
+
bool impute_at_fit = false;
|
176
|
+
bool handle_interrupt = false;
|
177
|
+
|
84
178
|
fit_iforest(
|
85
179
|
NULL,
|
86
180
|
&iso,
|
87
181
|
numeric_data,
|
88
182
|
ncols_numeric,
|
89
|
-
|
183
|
+
categorical_data,
|
90
184
|
ncols_categ,
|
91
185
|
ncat,
|
92
186
|
Xc,
|
@@ -126,18 +220,31 @@ void Init_ext()
|
|
126
220
|
weigh_imp_rows,
|
127
221
|
impute_at_fit,
|
128
222
|
random_seed,
|
223
|
+
handle_interrupt,
|
129
224
|
nthreads
|
130
225
|
);
|
131
226
|
|
132
227
|
return iso;
|
133
228
|
})
|
134
|
-
.
|
229
|
+
.define_singleton_function(
|
135
230
|
"predict_iforest",
|
136
|
-
|
231
|
+
[](ExtIsoForest& iso, Hash options) {
|
137
232
|
// data
|
138
233
|
size_t nrows = options.get<size_t, Symbol>("nrows");
|
139
|
-
|
140
|
-
|
234
|
+
size_t ncols_numeric = options.get<size_t, Symbol>("ncols_numeric");
|
235
|
+
size_t ncols_categ = options.get<size_t, Symbol>("ncols_categ");
|
236
|
+
|
237
|
+
double *restrict numeric_data = NULL;
|
238
|
+
if (ncols_numeric > 0) {
|
239
|
+
numeric_data = (double*) options.get<String, Symbol>("numeric_data").c_str();
|
240
|
+
}
|
241
|
+
|
242
|
+
int *restrict categorical_data = NULL;
|
243
|
+
if (ncols_categ > 0) {
|
244
|
+
categorical_data = (int*) options.get<String, Symbol>("categorical_data").c_str();
|
245
|
+
}
|
246
|
+
|
247
|
+
// not used (sparse matrices)
|
141
248
|
double* Xc = NULL;
|
142
249
|
sparse_ix* Xc_ind = NULL;
|
143
250
|
sparse_ix* Xc_indptr = NULL;
|
@@ -147,13 +254,13 @@ void Init_ext()
|
|
147
254
|
|
148
255
|
// options
|
149
256
|
int nthreads = options.get<int, Symbol>("nthreads");
|
150
|
-
bool standardize =
|
257
|
+
bool standardize = options.get<bool, Symbol>("standardize");
|
151
258
|
std::vector<double> outlier_scores(nrows);
|
152
259
|
sparse_ix* tree_num = NULL;
|
153
260
|
|
154
261
|
predict_iforest(
|
155
262
|
numeric_data,
|
156
|
-
|
263
|
+
categorical_data,
|
157
264
|
Xc,
|
158
265
|
Xc_ind,
|
159
266
|
Xc_indptr,
|
@@ -174,5 +281,29 @@ void Init_ext()
|
|
174
281
|
ret.push(outlier_scores[i]);
|
175
282
|
}
|
176
283
|
return ret;
|
284
|
+
})
|
285
|
+
.define_singleton_function(
|
286
|
+
"serialize_ext_isoforest",
|
287
|
+
[](ExtIsoForest& iso, String path) {
|
288
|
+
#ifdef _MSC_VER
|
289
|
+
// TODO convert to wchar_t
|
290
|
+
throw std::runtime_error("Not supported on Windows yet");
|
291
|
+
#else
|
292
|
+
serialize_ext_isoforest(iso, path.c_str());
|
293
|
+
#endif
|
294
|
+
})
|
295
|
+
.define_singleton_function(
|
296
|
+
"deserialize_ext_isoforest",
|
297
|
+
[](String path) {
|
298
|
+
ExtIsoForest iso;
|
299
|
+
|
300
|
+
#ifdef _MSC_VER
|
301
|
+
// TODO convert to wchar_t
|
302
|
+
throw std::runtime_error("Not supported on Windows yet");
|
303
|
+
#else
|
304
|
+
deserialize_ext_isoforest(iso, path.c_str());
|
305
|
+
#endif
|
306
|
+
|
307
|
+
return iso;
|
177
308
|
});
|
178
309
|
}
|