isotree 0.1.0 → 0.1.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +25 -1
- data/LICENSE.txt +2 -1
- data/README.md +64 -6
- data/ext/isotree/ext.cpp +139 -30
- data/ext/isotree/extconf.rb +2 -1
- data/lib/isotree.rb +2 -0
- data/lib/isotree/dataset.rb +73 -0
- data/lib/isotree/isolation_forest.rb +182 -29
- data/lib/isotree/version.rb +1 -1
- data/vendor/cereal/LICENSE +24 -0
- data/vendor/cereal/README.md +85 -0
- data/vendor/cereal/include/cereal/access.hpp +351 -0
- data/vendor/cereal/include/cereal/archives/adapters.hpp +163 -0
- data/vendor/cereal/include/cereal/archives/binary.hpp +169 -0
- data/vendor/cereal/include/cereal/archives/json.hpp +1019 -0
- data/vendor/cereal/include/cereal/archives/portable_binary.hpp +334 -0
- data/vendor/cereal/include/cereal/archives/xml.hpp +956 -0
- data/vendor/cereal/include/cereal/cereal.hpp +1089 -0
- data/vendor/cereal/include/cereal/details/helpers.hpp +422 -0
- data/vendor/cereal/include/cereal/details/polymorphic_impl.hpp +796 -0
- data/vendor/cereal/include/cereal/details/polymorphic_impl_fwd.hpp +65 -0
- data/vendor/cereal/include/cereal/details/static_object.hpp +127 -0
- data/vendor/cereal/include/cereal/details/traits.hpp +1411 -0
- data/vendor/cereal/include/cereal/details/util.hpp +84 -0
- data/vendor/cereal/include/cereal/external/base64.hpp +134 -0
- data/vendor/cereal/include/cereal/external/rapidjson/allocators.h +284 -0
- data/vendor/cereal/include/cereal/external/rapidjson/cursorstreamwrapper.h +78 -0
- data/vendor/cereal/include/cereal/external/rapidjson/document.h +2652 -0
- data/vendor/cereal/include/cereal/external/rapidjson/encodedstream.h +299 -0
- data/vendor/cereal/include/cereal/external/rapidjson/encodings.h +716 -0
- data/vendor/cereal/include/cereal/external/rapidjson/error/en.h +74 -0
- data/vendor/cereal/include/cereal/external/rapidjson/error/error.h +161 -0
- data/vendor/cereal/include/cereal/external/rapidjson/filereadstream.h +99 -0
- data/vendor/cereal/include/cereal/external/rapidjson/filewritestream.h +104 -0
- data/vendor/cereal/include/cereal/external/rapidjson/fwd.h +151 -0
- data/vendor/cereal/include/cereal/external/rapidjson/internal/biginteger.h +290 -0
- data/vendor/cereal/include/cereal/external/rapidjson/internal/diyfp.h +271 -0
- data/vendor/cereal/include/cereal/external/rapidjson/internal/dtoa.h +245 -0
- data/vendor/cereal/include/cereal/external/rapidjson/internal/ieee754.h +78 -0
- data/vendor/cereal/include/cereal/external/rapidjson/internal/itoa.h +308 -0
- data/vendor/cereal/include/cereal/external/rapidjson/internal/meta.h +186 -0
- data/vendor/cereal/include/cereal/external/rapidjson/internal/pow10.h +55 -0
- data/vendor/cereal/include/cereal/external/rapidjson/internal/regex.h +740 -0
- data/vendor/cereal/include/cereal/external/rapidjson/internal/stack.h +232 -0
- data/vendor/cereal/include/cereal/external/rapidjson/internal/strfunc.h +69 -0
- data/vendor/cereal/include/cereal/external/rapidjson/internal/strtod.h +290 -0
- data/vendor/cereal/include/cereal/external/rapidjson/internal/swap.h +46 -0
- data/vendor/cereal/include/cereal/external/rapidjson/istreamwrapper.h +128 -0
- data/vendor/cereal/include/cereal/external/rapidjson/memorybuffer.h +70 -0
- data/vendor/cereal/include/cereal/external/rapidjson/memorystream.h +71 -0
- data/vendor/cereal/include/cereal/external/rapidjson/msinttypes/inttypes.h +316 -0
- data/vendor/cereal/include/cereal/external/rapidjson/msinttypes/stdint.h +300 -0
- data/vendor/cereal/include/cereal/external/rapidjson/ostreamwrapper.h +81 -0
- data/vendor/cereal/include/cereal/external/rapidjson/pointer.h +1414 -0
- data/vendor/cereal/include/cereal/external/rapidjson/prettywriter.h +277 -0
- data/vendor/cereal/include/cereal/external/rapidjson/rapidjson.h +656 -0
- data/vendor/cereal/include/cereal/external/rapidjson/reader.h +2230 -0
- data/vendor/cereal/include/cereal/external/rapidjson/schema.h +2497 -0
- data/vendor/cereal/include/cereal/external/rapidjson/stream.h +223 -0
- data/vendor/cereal/include/cereal/external/rapidjson/stringbuffer.h +121 -0
- data/vendor/cereal/include/cereal/external/rapidjson/writer.h +709 -0
- data/vendor/cereal/include/cereal/external/rapidxml/license.txt +52 -0
- data/vendor/cereal/include/cereal/external/rapidxml/manual.html +406 -0
- data/vendor/cereal/include/cereal/external/rapidxml/rapidxml.hpp +2624 -0
- data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_iterators.hpp +175 -0
- data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_print.hpp +428 -0
- data/vendor/cereal/include/cereal/external/rapidxml/rapidxml_utils.hpp +123 -0
- data/vendor/cereal/include/cereal/macros.hpp +154 -0
- data/vendor/cereal/include/cereal/specialize.hpp +139 -0
- data/vendor/cereal/include/cereal/types/array.hpp +79 -0
- data/vendor/cereal/include/cereal/types/atomic.hpp +55 -0
- data/vendor/cereal/include/cereal/types/base_class.hpp +203 -0
- data/vendor/cereal/include/cereal/types/bitset.hpp +176 -0
- data/vendor/cereal/include/cereal/types/boost_variant.hpp +164 -0
- data/vendor/cereal/include/cereal/types/chrono.hpp +72 -0
- data/vendor/cereal/include/cereal/types/common.hpp +129 -0
- data/vendor/cereal/include/cereal/types/complex.hpp +56 -0
- data/vendor/cereal/include/cereal/types/concepts/pair_associative_container.hpp +73 -0
- data/vendor/cereal/include/cereal/types/deque.hpp +62 -0
- data/vendor/cereal/include/cereal/types/forward_list.hpp +68 -0
- data/vendor/cereal/include/cereal/types/functional.hpp +43 -0
- data/vendor/cereal/include/cereal/types/list.hpp +62 -0
- data/vendor/cereal/include/cereal/types/map.hpp +36 -0
- data/vendor/cereal/include/cereal/types/memory.hpp +425 -0
- data/vendor/cereal/include/cereal/types/optional.hpp +66 -0
- data/vendor/cereal/include/cereal/types/polymorphic.hpp +483 -0
- data/vendor/cereal/include/cereal/types/queue.hpp +132 -0
- data/vendor/cereal/include/cereal/types/set.hpp +103 -0
- data/vendor/cereal/include/cereal/types/stack.hpp +76 -0
- data/vendor/cereal/include/cereal/types/string.hpp +61 -0
- data/vendor/cereal/include/cereal/types/tuple.hpp +123 -0
- data/vendor/cereal/include/cereal/types/unordered_map.hpp +36 -0
- data/vendor/cereal/include/cereal/types/unordered_set.hpp +99 -0
- data/vendor/cereal/include/cereal/types/utility.hpp +47 -0
- data/vendor/cereal/include/cereal/types/valarray.hpp +89 -0
- data/vendor/cereal/include/cereal/types/variant.hpp +109 -0
- data/vendor/cereal/include/cereal/types/vector.hpp +112 -0
- data/vendor/cereal/include/cereal/version.hpp +52 -0
- data/vendor/isotree/LICENSE +1 -1
- data/vendor/isotree/README.md +7 -2
- data/vendor/isotree/src/RcppExports.cpp +44 -4
- data/vendor/isotree/src/Rwrapper.cpp +141 -51
- data/vendor/isotree/src/crit.cpp +1 -1
- data/vendor/isotree/src/dealloc.cpp +1 -1
- data/vendor/isotree/src/dist.cpp +6 -6
- data/vendor/isotree/src/extended.cpp +5 -5
- data/vendor/isotree/src/fit_model.cpp +27 -5
- data/vendor/isotree/src/helpers_iforest.cpp +26 -11
- data/vendor/isotree/src/impute.cpp +7 -7
- data/vendor/isotree/src/isoforest.cpp +7 -7
- data/vendor/isotree/src/isotree.hpp +27 -5
- data/vendor/isotree/src/merge_models.cpp +1 -1
- data/vendor/isotree/src/mult.cpp +1 -1
- data/vendor/isotree/src/predict.cpp +20 -16
- data/vendor/isotree/src/serialize.cpp +1 -1
- data/vendor/isotree/src/sql.cpp +545 -0
- data/vendor/isotree/src/utils.cpp +36 -44
- metadata +99 -78
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a5974c65c4adc4fd79ee7770f324c906a2788534f3fe6f381d61711e7cdce78c
|
4
|
+
data.tar.gz: d401e7c5aaabcd5dcffd5e4f54d93ea4d0966698854e70d3475fd79a92e1e241
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d00de0c3902b4f7fd3e13e08f0738ec4daf8be173f60cf46f49318e76f5be3c4ef3edd925f83074e03b4fe181aa6384d43c749f5f72247cf08d44f6811a6fe80
|
7
|
+
data.tar.gz: 70195ae442c2e4762b2f900a82a891d8f991ace1e521d625f910a1b5e3482334a299674c255e1aebf855728ee169ab5886428baf6df9a5e436bc73f3aff6dda6
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,27 @@
|
|
1
|
-
## 0.1.
|
1
|
+
## 0.1.5 (2021-03-14)
|
2
|
+
|
3
|
+
- Updated Isotree to 0.1.25
|
4
|
+
- Added support for exporting and importing models
|
5
|
+
|
6
|
+
## 0.1.4 (2020-08-22)
|
7
|
+
|
8
|
+
- Added `missing_action`, `new_categ_action`, `categ_split_type`, `coefs`, `depth_imp`, and `weigh_imp_rows` options
|
9
|
+
- Fixed signal handling
|
10
|
+
|
11
|
+
## 0.1.3 (2020-08-13)
|
12
|
+
|
13
|
+
- Added support for categorical data
|
14
|
+
- Added support for Rover data frames
|
15
|
+
- Added `output` option to `predict` method
|
16
|
+
|
17
|
+
## 0.1.2 (2020-08-11)
|
18
|
+
|
19
|
+
- Fixed outlier scores
|
20
|
+
|
21
|
+
## 0.1.1 (2020-08-10)
|
22
|
+
|
23
|
+
- Fixed installation error when cereal not installed
|
24
|
+
|
25
|
+
## 0.1.0 (2020-08-10)
|
2
26
|
|
3
27
|
- First release
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -1,9 +1,13 @@
|
|
1
1
|
# IsoTree
|
2
2
|
|
3
|
-
:evergreen_tree: [IsoTree](https://github.com/david-cortes/isotree) - outlier/anomaly detection
|
3
|
+
:evergreen_tree: [IsoTree](https://github.com/david-cortes/isotree) - outlier/anomaly detection using Isolation Forest - for Ruby
|
4
4
|
|
5
5
|
Learn how [Isolation Forest](https://www.youtube.com/watch?v=RyFQXQf4w4w) works
|
6
6
|
|
7
|
+
:deciduous_tree: Check out [OutlierTree](https://github.com/ankane/outliertree) for human-readable explanations of outliers
|
8
|
+
|
9
|
+
[![Build Status](https://github.com/ankane/isotree/workflows/build/badge.svg?branch=master)](https://github.com/ankane/isotree/actions)
|
10
|
+
|
7
11
|
## Installation
|
8
12
|
|
9
13
|
Add this line to your application’s Gemfile:
|
@@ -17,24 +21,40 @@ gem 'isotree'
|
|
17
21
|
Prep your data
|
18
22
|
|
19
23
|
```ruby
|
20
|
-
|
24
|
+
data = [
|
25
|
+
{department: "Books", sale: false, price: 2.50},
|
26
|
+
{department: "Books", sale: true, price: 3.00},
|
27
|
+
{department: "Movies", sale: false, price: 5.00}
|
28
|
+
]
|
21
29
|
```
|
22
30
|
|
23
31
|
Train a model
|
24
32
|
|
25
33
|
```ruby
|
26
34
|
model = IsoTree::IsolationForest.new
|
27
|
-
model.fit(
|
35
|
+
model.fit(data)
|
28
36
|
```
|
29
37
|
|
30
38
|
Get outlier scores
|
31
39
|
|
32
40
|
```ruby
|
33
|
-
model.predict(
|
41
|
+
model.predict(data)
|
34
42
|
```
|
35
43
|
|
36
44
|
Scores are between 0 and 1, with higher scores indicating outliers
|
37
45
|
|
46
|
+
Export the model
|
47
|
+
|
48
|
+
```ruby
|
49
|
+
model.export_model("model.bin")
|
50
|
+
```
|
51
|
+
|
52
|
+
Import a model
|
53
|
+
|
54
|
+
```ruby
|
55
|
+
model = IsoTree::IsolationForest.import_model("model.bin")
|
56
|
+
```
|
57
|
+
|
38
58
|
## Parameters
|
39
59
|
|
40
60
|
Pass parameters - default values below
|
@@ -50,12 +70,18 @@ IsoTree::IsolationForest.new(
|
|
50
70
|
prob_split_avg_gain: 0,
|
51
71
|
prob_split_pooled_gain: 0,
|
52
72
|
min_gain: 0,
|
73
|
+
missing_action: "impute",
|
74
|
+
new_categ_action: "smallest",
|
75
|
+
categ_split_type: "subset",
|
53
76
|
all_perm: false,
|
54
77
|
coef_by_prop: false,
|
55
78
|
sample_with_replacement: false,
|
56
79
|
penalize_range: true,
|
57
80
|
weigh_by_kurtosis: false,
|
81
|
+
coefs: "normal",
|
58
82
|
min_imp_obs: 3,
|
83
|
+
depth_imp: "higher",
|
84
|
+
weigh_imp_rows: "inverse",
|
59
85
|
random_seed: 1,
|
60
86
|
nthreads: -1
|
61
87
|
)
|
@@ -65,10 +91,20 @@ See a [detailed explanation](https://isotree.readthedocs.io/en/latest/#isotree.I
|
|
65
91
|
|
66
92
|
## Data
|
67
93
|
|
68
|
-
Data can be an array of
|
94
|
+
Data can be an array of hashes
|
95
|
+
|
96
|
+
```ruby
|
97
|
+
[
|
98
|
+
{department: "Books", sale: false, price: 2.50},
|
99
|
+
{department: "Books", sale: true, price: 3.00},
|
100
|
+
{department: "Movies", sale: false, price: 5.00}
|
101
|
+
]
|
102
|
+
```
|
103
|
+
|
104
|
+
Or a Rover data frame
|
69
105
|
|
70
106
|
```ruby
|
71
|
-
|
107
|
+
Rover.read_csv("data.csv")
|
72
108
|
```
|
73
109
|
|
74
110
|
Or a Numo array
|
@@ -87,6 +123,28 @@ brew install libomp
|
|
87
123
|
|
88
124
|
Then reinstall the gem.
|
89
125
|
|
126
|
+
```sh
|
127
|
+
gem uninstall isotree --force
|
128
|
+
bundle install
|
129
|
+
```
|
130
|
+
|
131
|
+
## Deployment
|
132
|
+
|
133
|
+
Check out [Trove](https://github.com/ankane/trove) for deploying models.
|
134
|
+
|
135
|
+
```sh
|
136
|
+
trove push model.bin
|
137
|
+
trove push model.bin.metadata
|
138
|
+
```
|
139
|
+
|
140
|
+
## Reference
|
141
|
+
|
142
|
+
Get the average isolation depth
|
143
|
+
|
144
|
+
```ruby
|
145
|
+
model.predict(data, output: "avg_depth")
|
146
|
+
```
|
147
|
+
|
90
148
|
## History
|
91
149
|
|
92
150
|
View the [changelog](https://github.com/ankane/isotree/blob/master/CHANGELOG.md)
|
data/ext/isotree/ext.cpp
CHANGED
@@ -5,17 +5,77 @@
|
|
5
5
|
#include <rice/Array.hpp>
|
6
6
|
#include <rice/Hash.hpp>
|
7
7
|
#include <rice/Module.hpp>
|
8
|
+
#include <rice/Object.hpp>
|
8
9
|
#include <rice/String.hpp>
|
9
10
|
#include <rice/Symbol.hpp>
|
10
11
|
|
11
12
|
using Rice::Array;
|
12
13
|
using Rice::Hash;
|
13
14
|
using Rice::Module;
|
15
|
+
using Rice::Object;
|
14
16
|
using Rice::String;
|
15
17
|
using Rice::Symbol;
|
16
18
|
using Rice::define_class_under;
|
17
19
|
using Rice::define_module;
|
18
20
|
|
21
|
+
template<>
|
22
|
+
NewCategAction from_ruby<NewCategAction>(Object x)
|
23
|
+
{
|
24
|
+
auto value = x.to_s().str();
|
25
|
+
if (value == "weighted") return Weighted;
|
26
|
+
if (value == "smallest") return Smallest;
|
27
|
+
if (value == "random") return Random;
|
28
|
+
throw std::runtime_error("Unknown new categ action: " + value);
|
29
|
+
}
|
30
|
+
|
31
|
+
template<>
|
32
|
+
MissingAction from_ruby<MissingAction>(Object x)
|
33
|
+
{
|
34
|
+
auto value = x.to_s().str();
|
35
|
+
if (value == "divide") return Divide;
|
36
|
+
if (value == "impute") return Impute;
|
37
|
+
if (value == "fail") return Fail;
|
38
|
+
throw std::runtime_error("Unknown missing action: " + value);
|
39
|
+
}
|
40
|
+
|
41
|
+
template<>
|
42
|
+
CategSplit from_ruby<CategSplit>(Object x)
|
43
|
+
{
|
44
|
+
auto value = x.to_s().str();
|
45
|
+
if (value == "subset") return SubSet;
|
46
|
+
if (value == "single_categ") return SingleCateg;
|
47
|
+
throw std::runtime_error("Unknown categ split: " + value);
|
48
|
+
}
|
49
|
+
|
50
|
+
template<>
|
51
|
+
CoefType from_ruby<CoefType>(Object x)
|
52
|
+
{
|
53
|
+
auto value = x.to_s().str();
|
54
|
+
if (value == "uniform") return Uniform;
|
55
|
+
if (value == "normal") return Normal;
|
56
|
+
throw std::runtime_error("Unknown coef type: " + value);
|
57
|
+
}
|
58
|
+
|
59
|
+
template<>
|
60
|
+
UseDepthImp from_ruby<UseDepthImp>(Object x)
|
61
|
+
{
|
62
|
+
auto value = x.to_s().str();
|
63
|
+
if (value == "lower") return Lower;
|
64
|
+
if (value == "higher") return Higher;
|
65
|
+
if (value == "same") return Same;
|
66
|
+
throw std::runtime_error("Unknown depth imp: " + value);
|
67
|
+
}
|
68
|
+
|
69
|
+
template<>
|
70
|
+
WeighImpRows from_ruby<WeighImpRows>(Object x)
|
71
|
+
{
|
72
|
+
auto value = x.to_s().str();
|
73
|
+
if (value == "inverse") return Inverse;
|
74
|
+
if (value == "prop") return Prop;
|
75
|
+
if (value == "flat") return Flat;
|
76
|
+
throw std::runtime_error("Unknown weight imp rows: " + value);
|
77
|
+
}
|
78
|
+
|
19
79
|
extern "C"
|
20
80
|
void Init_ext()
|
21
81
|
{
|
@@ -33,36 +93,28 @@ void Init_ext()
|
|
33
93
|
|
34
94
|
// data
|
35
95
|
size_t nrows = options.get<size_t, Symbol>("nrows");
|
36
|
-
size_t
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
96
|
+
size_t ncols_numeric = options.get<size_t, Symbol>("ncols_numeric");
|
97
|
+
size_t ncols_categ = options.get<size_t, Symbol>("ncols_categ");
|
98
|
+
|
99
|
+
double *restrict numeric_data = NULL;
|
100
|
+
if (ncols_numeric > 0) {
|
101
|
+
numeric_data = (double*) options.get<String, Symbol>("numeric_data").c_str();
|
102
|
+
}
|
103
|
+
|
104
|
+
int *restrict categorical_data = NULL;
|
105
|
+
int *restrict ncat = NULL;
|
106
|
+
if (ncols_categ > 0) {
|
107
|
+
categorical_data = (int*) options.get<String, Symbol>("categorical_data").c_str();
|
108
|
+
ncat = (int*) options.get<String, Symbol>("ncat").c_str();
|
109
|
+
}
|
110
|
+
|
111
|
+
// not used (sparse matrices)
|
42
112
|
double* Xc = NULL;
|
43
113
|
sparse_ix* Xc_ind = NULL;
|
44
114
|
sparse_ix* Xc_indptr = NULL;
|
45
115
|
|
46
116
|
// options
|
47
|
-
|
48
|
-
double* sample_weights = NULL;
|
49
|
-
bool weight_as_sample = false;
|
50
|
-
size_t max_depth = 0;
|
51
|
-
bool limit_depth = true;
|
52
|
-
bool standardize_dist = false;
|
53
|
-
double* tmat = NULL;
|
54
|
-
double* output_depths = NULL;
|
55
|
-
bool standardize_depth = false;
|
56
|
-
double* col_weights = NULL;
|
57
|
-
MissingAction missing_action = Impute;
|
58
|
-
CategSplit cat_split_type = SubSet;
|
59
|
-
NewCategAction new_cat_action = Smallest;
|
60
|
-
Imputer *imputer = NULL;
|
61
|
-
UseDepthImp depth_imp = Higher;
|
62
|
-
WeighImpRows weigh_imp_rows = Inverse;
|
63
|
-
bool impute_at_fit = false;
|
64
|
-
|
65
|
-
// Rice has limit of 14 arguments, so use hash for options
|
117
|
+
// Rice has limit of 14 arguments, so use hash
|
66
118
|
size_t sample_size = options.get<size_t, Symbol>("sample_size");
|
67
119
|
size_t ndim = options.get<size_t, Symbol>("ndim");
|
68
120
|
size_t ntrees = options.get<size_t, Symbol>("ntrees");
|
@@ -72,21 +124,41 @@ void Init_ext()
|
|
72
124
|
double prob_pick_by_gain_pl = options.get<double, Symbol>("prob_pick_pooled_gain");
|
73
125
|
double prob_split_by_gain_pl = options.get<double, Symbol>("prob_split_pooled_gain");
|
74
126
|
double min_gain = options.get<double, Symbol>("min_gain");
|
127
|
+
MissingAction missing_action = options.get<MissingAction, Symbol>("missing_action");
|
128
|
+
CategSplit cat_split_type = options.get<CategSplit, Symbol>("categ_split_type");
|
129
|
+
NewCategAction new_cat_action = options.get<NewCategAction, Symbol>("new_categ_action");
|
75
130
|
bool all_perm = options.get<bool, Symbol>("all_perm");
|
76
131
|
bool coef_by_prop = options.get<bool, Symbol>("coef_by_prop");
|
77
132
|
bool with_replacement = options.get<bool, Symbol>("sample_with_replacement");
|
78
133
|
bool penalize_range = options.get<bool, Symbol>("penalize_range");
|
79
134
|
bool weigh_by_kurt = options.get<bool, Symbol>("weigh_by_kurtosis");
|
135
|
+
CoefType coef_type = options.get<CoefType, Symbol>("coefs");
|
80
136
|
size_t min_imp_obs = options.get<size_t, Symbol>("min_imp_obs");
|
137
|
+
UseDepthImp depth_imp = options.get<UseDepthImp, Symbol>("depth_imp");
|
138
|
+
WeighImpRows weigh_imp_rows = options.get<WeighImpRows, Symbol>("weigh_imp_rows");
|
81
139
|
uint64_t random_seed = options.get<uint64_t, Symbol>("random_seed");
|
82
140
|
int nthreads = options.get<int, Symbol>("nthreads");
|
83
141
|
|
142
|
+
// TODO options
|
143
|
+
double* sample_weights = NULL;
|
144
|
+
bool weight_as_sample = false;
|
145
|
+
size_t max_depth = 0;
|
146
|
+
bool limit_depth = true;
|
147
|
+
bool standardize_dist = false;
|
148
|
+
double* tmat = NULL;
|
149
|
+
double* output_depths = NULL;
|
150
|
+
bool standardize_depth = false;
|
151
|
+
double* col_weights = NULL;
|
152
|
+
Imputer *imputer = NULL;
|
153
|
+
bool impute_at_fit = false;
|
154
|
+
bool handle_interrupt = false;
|
155
|
+
|
84
156
|
fit_iforest(
|
85
157
|
NULL,
|
86
158
|
&iso,
|
87
159
|
numeric_data,
|
88
160
|
ncols_numeric,
|
89
|
-
|
161
|
+
categorical_data,
|
90
162
|
ncols_categ,
|
91
163
|
ncat,
|
92
164
|
Xc,
|
@@ -126,6 +198,7 @@ void Init_ext()
|
|
126
198
|
weigh_imp_rows,
|
127
199
|
impute_at_fit,
|
128
200
|
random_seed,
|
201
|
+
handle_interrupt,
|
129
202
|
nthreads
|
130
203
|
);
|
131
204
|
|
@@ -136,8 +209,20 @@ void Init_ext()
|
|
136
209
|
*[](ExtIsoForest& iso, Hash options) {
|
137
210
|
// data
|
138
211
|
size_t nrows = options.get<size_t, Symbol>("nrows");
|
139
|
-
|
140
|
-
|
212
|
+
size_t ncols_numeric = options.get<size_t, Symbol>("ncols_numeric");
|
213
|
+
size_t ncols_categ = options.get<size_t, Symbol>("ncols_categ");
|
214
|
+
|
215
|
+
double *restrict numeric_data = NULL;
|
216
|
+
if (ncols_numeric > 0) {
|
217
|
+
numeric_data = (double*) options.get<String, Symbol>("numeric_data").c_str();
|
218
|
+
}
|
219
|
+
|
220
|
+
int *restrict categorical_data = NULL;
|
221
|
+
if (ncols_categ > 0) {
|
222
|
+
categorical_data = (int*) options.get<String, Symbol>("categorical_data").c_str();
|
223
|
+
}
|
224
|
+
|
225
|
+
// not used (sparse matrices)
|
141
226
|
double* Xc = NULL;
|
142
227
|
sparse_ix* Xc_ind = NULL;
|
143
228
|
sparse_ix* Xc_indptr = NULL;
|
@@ -147,13 +232,13 @@ void Init_ext()
|
|
147
232
|
|
148
233
|
// options
|
149
234
|
int nthreads = options.get<int, Symbol>("nthreads");
|
150
|
-
bool standardize =
|
235
|
+
bool standardize = options.get<bool, Symbol>("standardize");
|
151
236
|
std::vector<double> outlier_scores(nrows);
|
152
237
|
sparse_ix* tree_num = NULL;
|
153
238
|
|
154
239
|
predict_iforest(
|
155
240
|
numeric_data,
|
156
|
-
|
241
|
+
categorical_data,
|
157
242
|
Xc,
|
158
243
|
Xc_ind,
|
159
244
|
Xc_indptr,
|
@@ -174,5 +259,29 @@ void Init_ext()
|
|
174
259
|
ret.push(outlier_scores[i]);
|
175
260
|
}
|
176
261
|
return ret;
|
262
|
+
})
|
263
|
+
.define_singleton_method(
|
264
|
+
"serialize_ext_isoforest",
|
265
|
+
*[](ExtIsoForest& iso, String path) {
|
266
|
+
#ifdef _MSC_VER
|
267
|
+
// TODO convert to wchar_t
|
268
|
+
throw std::runtime_error("Not supported on Windows yet");
|
269
|
+
#else
|
270
|
+
serialize_ext_isoforest(iso, path.c_str());
|
271
|
+
#endif
|
272
|
+
})
|
273
|
+
.define_singleton_method(
|
274
|
+
"deserialize_ext_isoforest",
|
275
|
+
*[](String path) {
|
276
|
+
ExtIsoForest iso;
|
277
|
+
|
278
|
+
#ifdef _MSC_VER
|
279
|
+
// TODO convert to wchar_t
|
280
|
+
throw std::runtime_error("Not supported on Windows yet");
|
281
|
+
#else
|
282
|
+
deserialize_ext_isoforest(iso, path.c_str());
|
283
|
+
#endif
|
284
|
+
|
285
|
+
return iso;
|
177
286
|
});
|
178
287
|
}
|
data/ext/isotree/extconf.rb
CHANGED
@@ -12,10 +12,11 @@ end
|
|
12
12
|
|
13
13
|
ext = File.expand_path(".", __dir__)
|
14
14
|
isotree = File.expand_path("../../vendor/isotree/src", __dir__)
|
15
|
+
cereal = File.expand_path("../../vendor/cereal/include", __dir__)
|
15
16
|
|
16
17
|
exclude = %w(Rwrapper.cpp RcppExports.cpp)
|
17
18
|
$srcs = Dir["{#{ext},#{isotree}}/*.{cc,cpp}"].reject { |f| exclude.include?(File.basename(f)) }
|
18
|
-
$INCFLAGS << " -I#{isotree}"
|
19
|
+
$INCFLAGS << " -I#{isotree} -I#{cereal}"
|
19
20
|
$VPATH << isotree
|
20
21
|
|
21
22
|
create_makefile("isotree/ext")
|