isotree 0.4.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +0 -6
- data/ext/isotree/ext.cpp +25 -42
- data/lib/isotree/isolation_forest.rb +33 -13
- data/lib/isotree/version.rb +1 -1
- metadata +5 -9
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: e8752a98461516e8bffba379295408c157c7bc4b6764364c8246b6138d8080c4
|
|
4
|
+
data.tar.gz: 9981293449bbf7b46b6129b6c812164a986dd9af7f8cb0e549fa89080ed7da7e
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: a65fe9959da2ccd0489a377eecfcd2de535f094f23b2a363ad4768767f62e10af236a7c2b5163c7a37926192ce720c15034695aec4b3adc4868e9c4b89be1a8d
|
|
7
|
+
data.tar.gz: f878d2ae9bb5e189e07be86d13aa5efeb2c0cab76857e728eadcb14d485becf62cc5d8526f4e1064f99fe3c0dea5404e61af8c9a51e7780a347966edfd9d5a7c
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
|
@@ -157,12 +157,6 @@ Get the average isolation depth
|
|
|
157
157
|
model.predict(data, output: "avg_depth")
|
|
158
158
|
```
|
|
159
159
|
|
|
160
|
-
## Upgrading
|
|
161
|
-
|
|
162
|
-
### 0.3.0
|
|
163
|
-
|
|
164
|
-
This version uses IsoTree’s new serialization format. Exported models must be recreated.
|
|
165
|
-
|
|
166
160
|
## History
|
|
167
161
|
|
|
168
162
|
View the [changelog](https://github.com/ankane/isotree-ruby/blob/master/CHANGELOG.md)
|
data/ext/isotree/ext.cpp
CHANGED
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
#include <cmath>
|
|
3
3
|
#include <fstream>
|
|
4
4
|
#include <iostream>
|
|
5
|
+
#include <string>
|
|
6
|
+
#include <vector>
|
|
5
7
|
|
|
6
8
|
// isotree
|
|
7
9
|
#include <isotree.hpp>
|
|
@@ -11,21 +13,15 @@
|
|
|
11
13
|
|
|
12
14
|
using Rice::Array;
|
|
13
15
|
using Rice::Hash;
|
|
14
|
-
using Rice::Module;
|
|
15
16
|
using Rice::Object;
|
|
16
17
|
using Rice::String;
|
|
17
18
|
using Rice::Symbol;
|
|
18
|
-
using Rice::define_class_under;
|
|
19
|
-
using Rice::define_module;
|
|
20
19
|
|
|
21
|
-
namespace Rice::detail
|
|
22
|
-
{
|
|
20
|
+
namespace Rice::detail {
|
|
23
21
|
template<>
|
|
24
|
-
class From_Ruby<NewCategAction>
|
|
25
|
-
{
|
|
22
|
+
class From_Ruby<NewCategAction> {
|
|
26
23
|
public:
|
|
27
|
-
NewCategAction convert(VALUE x)
|
|
28
|
-
{
|
|
24
|
+
NewCategAction convert(VALUE x) {
|
|
29
25
|
auto value = Object(x).to_s().str();
|
|
30
26
|
if (value == "weighted" || value == "impute") return Weighted;
|
|
31
27
|
if (value == "smallest") return Smallest;
|
|
@@ -35,11 +31,9 @@ namespace Rice::detail
|
|
|
35
31
|
};
|
|
36
32
|
|
|
37
33
|
template<>
|
|
38
|
-
class From_Ruby<MissingAction>
|
|
39
|
-
{
|
|
34
|
+
class From_Ruby<MissingAction> {
|
|
40
35
|
public:
|
|
41
|
-
MissingAction convert(VALUE x)
|
|
42
|
-
{
|
|
36
|
+
MissingAction convert(VALUE x) {
|
|
43
37
|
auto value = Object(x).to_s().str();
|
|
44
38
|
if (value == "divide") return Divide;
|
|
45
39
|
if (value == "impute") return Impute;
|
|
@@ -49,11 +43,9 @@ namespace Rice::detail
|
|
|
49
43
|
};
|
|
50
44
|
|
|
51
45
|
template<>
|
|
52
|
-
class From_Ruby<CategSplit>
|
|
53
|
-
{
|
|
46
|
+
class From_Ruby<CategSplit> {
|
|
54
47
|
public:
|
|
55
|
-
CategSplit convert(VALUE x)
|
|
56
|
-
{
|
|
48
|
+
CategSplit convert(VALUE x) {
|
|
57
49
|
auto value = Object(x).to_s().str();
|
|
58
50
|
if (value == "subset") return SubSet;
|
|
59
51
|
if (value == "single_categ") return SingleCateg;
|
|
@@ -62,11 +54,9 @@ namespace Rice::detail
|
|
|
62
54
|
};
|
|
63
55
|
|
|
64
56
|
template<>
|
|
65
|
-
class From_Ruby<CoefType>
|
|
66
|
-
{
|
|
57
|
+
class From_Ruby<CoefType> {
|
|
67
58
|
public:
|
|
68
|
-
CoefType convert(VALUE x)
|
|
69
|
-
{
|
|
59
|
+
CoefType convert(VALUE x) {
|
|
70
60
|
auto value = Object(x).to_s().str();
|
|
71
61
|
if (value == "uniform") return Uniform;
|
|
72
62
|
if (value == "normal") return Normal;
|
|
@@ -75,11 +65,9 @@ namespace Rice::detail
|
|
|
75
65
|
};
|
|
76
66
|
|
|
77
67
|
template<>
|
|
78
|
-
class From_Ruby<UseDepthImp>
|
|
79
|
-
{
|
|
68
|
+
class From_Ruby<UseDepthImp> {
|
|
80
69
|
public:
|
|
81
|
-
UseDepthImp convert(VALUE x)
|
|
82
|
-
{
|
|
70
|
+
UseDepthImp convert(VALUE x) {
|
|
83
71
|
auto value = Object(x).to_s().str();
|
|
84
72
|
if (value == "lower") return Lower;
|
|
85
73
|
if (value == "higher") return Higher;
|
|
@@ -89,11 +77,9 @@ namespace Rice::detail
|
|
|
89
77
|
};
|
|
90
78
|
|
|
91
79
|
template<>
|
|
92
|
-
class From_Ruby<WeighImpRows>
|
|
93
|
-
{
|
|
80
|
+
class From_Ruby<WeighImpRows> {
|
|
94
81
|
public:
|
|
95
|
-
WeighImpRows convert(VALUE x)
|
|
96
|
-
{
|
|
82
|
+
WeighImpRows convert(VALUE x) {
|
|
97
83
|
auto value = Object(x).to_s().str();
|
|
98
84
|
if (value == "inverse") return Inverse;
|
|
99
85
|
if (value == "prop") return Prop;
|
|
@@ -103,11 +89,9 @@ namespace Rice::detail
|
|
|
103
89
|
};
|
|
104
90
|
|
|
105
91
|
template<>
|
|
106
|
-
class From_Ruby<ScoringMetric>
|
|
107
|
-
{
|
|
92
|
+
class From_Ruby<ScoringMetric> {
|
|
108
93
|
public:
|
|
109
|
-
ScoringMetric convert(VALUE x)
|
|
110
|
-
{
|
|
94
|
+
ScoringMetric convert(VALUE x) {
|
|
111
95
|
auto value = Object(x).to_s().str();
|
|
112
96
|
if (value == "depth") return Depth;
|
|
113
97
|
if (value == "adj_depth") return AdjDepth;
|
|
@@ -119,15 +103,14 @@ namespace Rice::detail
|
|
|
119
103
|
throw std::runtime_error("Unknown scoring metric: " + value);
|
|
120
104
|
}
|
|
121
105
|
};
|
|
122
|
-
}
|
|
106
|
+
} // namespace Rice::detail
|
|
123
107
|
|
|
124
108
|
extern "C"
|
|
125
|
-
void Init_ext()
|
|
126
|
-
|
|
127
|
-
Module rb_mIsoTree = define_module("IsoTree");
|
|
109
|
+
void Init_ext() {
|
|
110
|
+
Rice::Module rb_mIsoTree = Rice::define_module("IsoTree");
|
|
128
111
|
|
|
129
|
-
Module rb_mExt = define_module_under(rb_mIsoTree, "Ext");
|
|
130
|
-
define_class_under<ExtIsoForest>(rb_mExt, "ExtIsoForest");
|
|
112
|
+
Rice::Module rb_mExt = Rice::define_module_under(rb_mIsoTree, "Ext");
|
|
113
|
+
Rice::define_class_under<ExtIsoForest>(rb_mExt, "ExtIsoForest");
|
|
131
114
|
|
|
132
115
|
rb_mExt
|
|
133
116
|
.define_singleton_function(
|
|
@@ -325,7 +308,7 @@ void Init_ext()
|
|
|
325
308
|
|
|
326
309
|
Array ret;
|
|
327
310
|
for (size_t i = 0; i < outlier_scores.size(); i++) {
|
|
328
|
-
ret.push(outlier_scores[i]);
|
|
311
|
+
ret.push(outlier_scores[i], false);
|
|
329
312
|
}
|
|
330
313
|
return ret;
|
|
331
314
|
})
|
|
@@ -411,8 +394,8 @@ void Init_ext()
|
|
|
411
394
|
deserialize_combined(file, &model, &model_ext, &imputer, &indexer, optional_metadata);
|
|
412
395
|
file.close();
|
|
413
396
|
|
|
414
|
-
ret.push(Object(Rice::detail::To_Ruby<ExtIsoForest>().convert(model_ext)));
|
|
415
|
-
ret.push(String(std::string(optional_metadata, size_metadata)));
|
|
397
|
+
ret.push(Object(Rice::detail::To_Ruby<ExtIsoForest>().convert(model_ext)), false);
|
|
398
|
+
ret.push(String(std::string(optional_metadata, size_metadata)), false);
|
|
416
399
|
|
|
417
400
|
free(optional_metadata);
|
|
418
401
|
|
|
@@ -1,24 +1,44 @@
|
|
|
1
1
|
module IsoTree
|
|
2
2
|
class IsolationForest
|
|
3
3
|
def initialize(
|
|
4
|
-
sample_size: "auto",
|
|
4
|
+
sample_size: "auto",
|
|
5
|
+
ntrees: 500,
|
|
6
|
+
ndim: 3,
|
|
7
|
+
ntry: 1,
|
|
5
8
|
# categ_cols: nil,
|
|
6
|
-
max_depth: "auto",
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
9
|
+
max_depth: "auto",
|
|
10
|
+
ncols_per_tree: nil,
|
|
11
|
+
prob_pick_pooled_gain: 0.0,
|
|
12
|
+
prob_pick_avg_gain: 0.0,
|
|
13
|
+
prob_pick_full_gain: 0.0,
|
|
14
|
+
prob_pick_dens: 0.0,
|
|
15
|
+
prob_pick_col_by_range: 0.0,
|
|
16
|
+
prob_pick_col_by_var: 0.0,
|
|
17
|
+
prob_pick_col_by_kurt: 0.0,
|
|
18
|
+
min_gain: 0.0,
|
|
19
|
+
missing_action: "auto",
|
|
20
|
+
new_categ_action: "auto",
|
|
21
|
+
categ_split_type: "auto",
|
|
22
|
+
all_perm: false,
|
|
23
|
+
coef_by_prop: false,
|
|
12
24
|
# recode_categ: false,
|
|
13
25
|
weights_as_sample_prob: true,
|
|
14
|
-
sample_with_replacement: false,
|
|
15
|
-
|
|
16
|
-
|
|
26
|
+
sample_with_replacement: false,
|
|
27
|
+
penalize_range: false,
|
|
28
|
+
standardize_data: true,
|
|
29
|
+
scoring_metric: "depth",
|
|
30
|
+
fast_bratio: true,
|
|
31
|
+
weigh_by_kurtosis: false,
|
|
32
|
+
coefs: "uniform",
|
|
33
|
+
assume_full_distr: true,
|
|
17
34
|
# build_imputer: false,
|
|
18
|
-
min_imp_obs: 3,
|
|
19
|
-
|
|
35
|
+
min_imp_obs: 3,
|
|
36
|
+
depth_imp: "higher",
|
|
37
|
+
weigh_imp_rows: "inverse",
|
|
38
|
+
random_seed: 1,
|
|
39
|
+
use_long_double: false,
|
|
40
|
+
nthreads: -1
|
|
20
41
|
)
|
|
21
|
-
|
|
22
42
|
@sample_size = sample_size
|
|
23
43
|
@ntrees = ntrees
|
|
24
44
|
@ndim = ndim
|
data/lib/isotree/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: isotree
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.4.
|
|
4
|
+
version: 0.4.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Andrew Kane
|
|
8
|
-
autorequire:
|
|
9
8
|
bindir: bin
|
|
10
9
|
cert_chain: []
|
|
11
|
-
date:
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
12
11
|
dependencies:
|
|
13
12
|
- !ruby/object:Gem::Dependency
|
|
14
13
|
name: rice
|
|
@@ -16,15 +15,14 @@ dependencies:
|
|
|
16
15
|
requirements:
|
|
17
16
|
- - ">="
|
|
18
17
|
- !ruby/object:Gem::Version
|
|
19
|
-
version: '4.
|
|
18
|
+
version: '4.7'
|
|
20
19
|
type: :runtime
|
|
21
20
|
prerelease: false
|
|
22
21
|
version_requirements: !ruby/object:Gem::Requirement
|
|
23
22
|
requirements:
|
|
24
23
|
- - ">="
|
|
25
24
|
- !ruby/object:Gem::Version
|
|
26
|
-
version: '4.
|
|
27
|
-
description:
|
|
25
|
+
version: '4.7'
|
|
28
26
|
email: andrew@ankane.org
|
|
29
27
|
executables: []
|
|
30
28
|
extensions:
|
|
@@ -88,7 +86,6 @@ homepage: https://github.com/ankane/isotree-ruby
|
|
|
88
86
|
licenses:
|
|
89
87
|
- BSD-2-Clause
|
|
90
88
|
metadata: {}
|
|
91
|
-
post_install_message:
|
|
92
89
|
rdoc_options: []
|
|
93
90
|
require_paths:
|
|
94
91
|
- lib
|
|
@@ -103,8 +100,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
103
100
|
- !ruby/object:Gem::Version
|
|
104
101
|
version: '0'
|
|
105
102
|
requirements: []
|
|
106
|
-
rubygems_version: 3.
|
|
107
|
-
signing_key:
|
|
103
|
+
rubygems_version: 3.6.9
|
|
108
104
|
specification_version: 4
|
|
109
105
|
summary: Outlier/anomaly detection for Ruby using Isolation Forest
|
|
110
106
|
test_files: []
|