isotree 0.1.5 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -0
- data/README.md +9 -9
- data/ext/isotree/ext.cpp +86 -64
- data/ext/isotree/extconf.rb +1 -1
- data/lib/isotree/dataset.rb +0 -1
- data/lib/isotree/isolation_forest.rb +4 -0
- data/lib/isotree/version.rb +1 -1
- metadata +7 -7
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 1876c50ef4d9bbd7fc7898c222b9f10b8d287a38cf9d37af53e9841f63494299
|
|
4
|
+
data.tar.gz: ea7b60ae9683498df1910fb3f4774ce8c94daf9fe2a6e23ffd87a9d488bcc5ce
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 3d0f6d95cac8b7dd457512c0ce41c3ae286cf8f344ccc8a47dee1c00104657b3d2d9c97be1c1211fcb253065ba7fcf3d3b955a89dd9bac8b2b6e5e35516c7b7d
|
|
7
|
+
data.tar.gz: 476d7c8ffe5c252ba94ecc7a68e59fc3afbbb2a732fd9e3127e032e543a74957fcdef6173b6225dfee7370dbb86625b61b02d16a68cc7257be8f7cbdae87a581
|
data/CHANGELOG.md
CHANGED
|
@@ -1,3 +1,16 @@
|
|
|
1
|
+
## 0.2.2 (2022-06-12)
|
|
2
|
+
|
|
3
|
+
- Fixed segfault when data is smaller than sample size
|
|
4
|
+
|
|
5
|
+
## 0.2.1 (2021-05-23)
|
|
6
|
+
|
|
7
|
+
- Improved performance
|
|
8
|
+
|
|
9
|
+
## 0.2.0 (2021-05-17)
|
|
10
|
+
|
|
11
|
+
- Updated to Rice 4
|
|
12
|
+
- Dropped support for Ruby < 2.6
|
|
13
|
+
|
|
1
14
|
## 0.1.5 (2021-03-14)
|
|
2
15
|
|
|
3
16
|
- Updated Isotree to 0.1.25
|
data/README.md
CHANGED
|
@@ -1,19 +1,19 @@
|
|
|
1
|
-
# IsoTree
|
|
1
|
+
# IsoTree Ruby
|
|
2
2
|
|
|
3
3
|
:evergreen_tree: [IsoTree](https://github.com/david-cortes/isotree) - outlier/anomaly detection using Isolation Forest - for Ruby
|
|
4
4
|
|
|
5
5
|
Learn how [Isolation Forest](https://www.youtube.com/watch?v=RyFQXQf4w4w) works
|
|
6
6
|
|
|
7
|
-
:deciduous_tree: Check out [OutlierTree](https://github.com/ankane/outliertree) for human-readable explanations of outliers
|
|
7
|
+
:deciduous_tree: Check out [OutlierTree](https://github.com/ankane/outliertree-ruby) for human-readable explanations of outliers
|
|
8
8
|
|
|
9
|
-
[](https://github.com/ankane/isotree/actions)
|
|
9
|
+
[](https://github.com/ankane/isotree-ruby/actions)
|
|
10
10
|
|
|
11
11
|
## Installation
|
|
12
12
|
|
|
13
13
|
Add this line to your application’s Gemfile:
|
|
14
14
|
|
|
15
15
|
```ruby
|
|
16
|
-
gem
|
|
16
|
+
gem "isotree"
|
|
17
17
|
```
|
|
18
18
|
|
|
19
19
|
## Getting Started
|
|
@@ -147,22 +147,22 @@ model.predict(data, output: "avg_depth")
|
|
|
147
147
|
|
|
148
148
|
## History
|
|
149
149
|
|
|
150
|
-
View the [changelog](https://github.com/ankane/isotree/blob/master/CHANGELOG.md)
|
|
150
|
+
View the [changelog](https://github.com/ankane/isotree-ruby/blob/master/CHANGELOG.md)
|
|
151
151
|
|
|
152
152
|
## Contributing
|
|
153
153
|
|
|
154
154
|
Everyone is encouraged to help improve this project. Here are a few ways you can help:
|
|
155
155
|
|
|
156
|
-
- [Report bugs](https://github.com/ankane/isotree/issues)
|
|
157
|
-
- Fix bugs and [submit pull requests](https://github.com/ankane/isotree/pulls)
|
|
156
|
+
- [Report bugs](https://github.com/ankane/isotree-ruby/issues)
|
|
157
|
+
- Fix bugs and [submit pull requests](https://github.com/ankane/isotree-ruby/pulls)
|
|
158
158
|
- Write, clarify, or fix documentation
|
|
159
159
|
- Suggest or add new features
|
|
160
160
|
|
|
161
161
|
To get started with development:
|
|
162
162
|
|
|
163
163
|
```sh
|
|
164
|
-
git clone --recursive https://github.com/ankane/isotree.git
|
|
165
|
-
cd isotree
|
|
164
|
+
git clone --recursive https://github.com/ankane/isotree-ruby.git
|
|
165
|
+
cd isotree-ruby
|
|
166
166
|
bundle install
|
|
167
167
|
bundle exec rake compile
|
|
168
168
|
bundle exec rake test
|
data/ext/isotree/ext.cpp
CHANGED
|
@@ -2,12 +2,7 @@
|
|
|
2
2
|
#include <isotree.hpp>
|
|
3
3
|
|
|
4
4
|
// rice
|
|
5
|
-
#include <rice/
|
|
6
|
-
#include <rice/Hash.hpp>
|
|
7
|
-
#include <rice/Module.hpp>
|
|
8
|
-
#include <rice/Object.hpp>
|
|
9
|
-
#include <rice/String.hpp>
|
|
10
|
-
#include <rice/Symbol.hpp>
|
|
5
|
+
#include <rice/rice.hpp>
|
|
11
6
|
|
|
12
7
|
using Rice::Array;
|
|
13
8
|
using Rice::Hash;
|
|
@@ -18,62 +13,89 @@ using Rice::Symbol;
|
|
|
18
13
|
using Rice::define_class_under;
|
|
19
14
|
using Rice::define_module;
|
|
20
15
|
|
|
21
|
-
|
|
22
|
-
NewCategAction from_ruby<NewCategAction>(Object x)
|
|
16
|
+
namespace Rice::detail
|
|
23
17
|
{
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
18
|
+
template<>
|
|
19
|
+
class From_Ruby<NewCategAction>
|
|
20
|
+
{
|
|
21
|
+
public:
|
|
22
|
+
NewCategAction convert(VALUE x)
|
|
23
|
+
{
|
|
24
|
+
auto value = Object(x).to_s().str();
|
|
25
|
+
if (value == "weighted") return Weighted;
|
|
26
|
+
if (value == "smallest") return Smallest;
|
|
27
|
+
if (value == "random") return Random;
|
|
28
|
+
throw std::runtime_error("Unknown new categ action: " + value);
|
|
29
|
+
}
|
|
30
|
+
};
|
|
30
31
|
|
|
31
|
-
template<>
|
|
32
|
-
|
|
33
|
-
{
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
32
|
+
template<>
|
|
33
|
+
class From_Ruby<MissingAction>
|
|
34
|
+
{
|
|
35
|
+
public:
|
|
36
|
+
MissingAction convert(VALUE x)
|
|
37
|
+
{
|
|
38
|
+
auto value = Object(x).to_s().str();
|
|
39
|
+
if (value == "divide") return Divide;
|
|
40
|
+
if (value == "impute") return Impute;
|
|
41
|
+
if (value == "fail") return Fail;
|
|
42
|
+
throw std::runtime_error("Unknown missing action: " + value);
|
|
43
|
+
}
|
|
44
|
+
};
|
|
40
45
|
|
|
41
|
-
template<>
|
|
42
|
-
|
|
43
|
-
{
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
46
|
+
template<>
|
|
47
|
+
class From_Ruby<CategSplit>
|
|
48
|
+
{
|
|
49
|
+
public:
|
|
50
|
+
CategSplit convert(VALUE x)
|
|
51
|
+
{
|
|
52
|
+
auto value = Object(x).to_s().str();
|
|
53
|
+
if (value == "subset") return SubSet;
|
|
54
|
+
if (value == "single_categ") return SingleCateg;
|
|
55
|
+
throw std::runtime_error("Unknown categ split: " + value);
|
|
56
|
+
}
|
|
57
|
+
};
|
|
49
58
|
|
|
50
|
-
template<>
|
|
51
|
-
|
|
52
|
-
{
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
59
|
+
template<>
|
|
60
|
+
class From_Ruby<CoefType>
|
|
61
|
+
{
|
|
62
|
+
public:
|
|
63
|
+
CoefType convert(VALUE x)
|
|
64
|
+
{
|
|
65
|
+
auto value = Object(x).to_s().str();
|
|
66
|
+
if (value == "uniform") return Uniform;
|
|
67
|
+
if (value == "normal") return Normal;
|
|
68
|
+
throw std::runtime_error("Unknown coef type: " + value);
|
|
69
|
+
}
|
|
70
|
+
};
|
|
58
71
|
|
|
59
|
-
template<>
|
|
60
|
-
|
|
61
|
-
{
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
72
|
+
template<>
|
|
73
|
+
class From_Ruby<UseDepthImp>
|
|
74
|
+
{
|
|
75
|
+
public:
|
|
76
|
+
UseDepthImp convert(VALUE x)
|
|
77
|
+
{
|
|
78
|
+
auto value = Object(x).to_s().str();
|
|
79
|
+
if (value == "lower") return Lower;
|
|
80
|
+
if (value == "higher") return Higher;
|
|
81
|
+
if (value == "same") return Same;
|
|
82
|
+
throw std::runtime_error("Unknown depth imp: " + value);
|
|
83
|
+
}
|
|
84
|
+
};
|
|
68
85
|
|
|
69
|
-
template<>
|
|
70
|
-
|
|
71
|
-
{
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
86
|
+
template<>
|
|
87
|
+
class From_Ruby<WeighImpRows>
|
|
88
|
+
{
|
|
89
|
+
public:
|
|
90
|
+
WeighImpRows convert(VALUE x)
|
|
91
|
+
{
|
|
92
|
+
auto value = Object(x).to_s().str();
|
|
93
|
+
if (value == "inverse") return Inverse;
|
|
94
|
+
if (value == "prop") return Prop;
|
|
95
|
+
if (value == "flat") return Flat;
|
|
96
|
+
throw std::runtime_error("Unknown weight imp rows: " + value);
|
|
97
|
+
}
|
|
98
|
+
};
|
|
77
99
|
}
|
|
78
100
|
|
|
79
101
|
extern "C"
|
|
@@ -85,9 +107,9 @@ void Init_ext()
|
|
|
85
107
|
define_class_under<ExtIsoForest>(rb_mExt, "ExtIsoForest");
|
|
86
108
|
|
|
87
109
|
rb_mExt
|
|
88
|
-
.
|
|
110
|
+
.define_singleton_function(
|
|
89
111
|
"fit_iforest",
|
|
90
|
-
|
|
112
|
+
[](Hash options) {
|
|
91
113
|
// model
|
|
92
114
|
ExtIsoForest iso;
|
|
93
115
|
|
|
@@ -204,9 +226,9 @@ void Init_ext()
|
|
|
204
226
|
|
|
205
227
|
return iso;
|
|
206
228
|
})
|
|
207
|
-
.
|
|
229
|
+
.define_singleton_function(
|
|
208
230
|
"predict_iforest",
|
|
209
|
-
|
|
231
|
+
[](ExtIsoForest& iso, Hash options) {
|
|
210
232
|
// data
|
|
211
233
|
size_t nrows = options.get<size_t, Symbol>("nrows");
|
|
212
234
|
size_t ncols_numeric = options.get<size_t, Symbol>("ncols_numeric");
|
|
@@ -260,9 +282,9 @@ void Init_ext()
|
|
|
260
282
|
}
|
|
261
283
|
return ret;
|
|
262
284
|
})
|
|
263
|
-
.
|
|
285
|
+
.define_singleton_function(
|
|
264
286
|
"serialize_ext_isoforest",
|
|
265
|
-
|
|
287
|
+
[](ExtIsoForest& iso, String path) {
|
|
266
288
|
#ifdef _MSC_VER
|
|
267
289
|
// TODO convert to wchar_t
|
|
268
290
|
throw std::runtime_error("Not supported on Windows yet");
|
|
@@ -270,9 +292,9 @@ void Init_ext()
|
|
|
270
292
|
serialize_ext_isoforest(iso, path.c_str());
|
|
271
293
|
#endif
|
|
272
294
|
})
|
|
273
|
-
.
|
|
295
|
+
.define_singleton_function(
|
|
274
296
|
"deserialize_ext_isoforest",
|
|
275
|
-
|
|
297
|
+
[](String path) {
|
|
276
298
|
ExtIsoForest iso;
|
|
277
299
|
|
|
278
300
|
#ifdef _MSC_VER
|
data/ext/isotree/extconf.rb
CHANGED
data/lib/isotree/dataset.rb
CHANGED
|
@@ -32,7 +32,6 @@ module IsoTree
|
|
|
32
32
|
raise ArgumentError, "Array elements must be all hashes or arrays"
|
|
33
33
|
end
|
|
34
34
|
|
|
35
|
-
nrows = data.size
|
|
36
35
|
ncols = data.first ? data.first.size : 0
|
|
37
36
|
if data.any? { |r| r.size != ncols }
|
|
38
37
|
raise ArgumentError, "All rows must have the same number of columns"
|
|
@@ -44,6 +44,10 @@ module IsoTree
|
|
|
44
44
|
prep_fit(x)
|
|
45
45
|
options = data_options(x).merge(fit_options)
|
|
46
46
|
options[:sample_size] ||= options[:nrows]
|
|
47
|
+
|
|
48
|
+
# prevent segfault
|
|
49
|
+
options[:sample_size] = options[:nrows] if options[:sample_size] > options[:nrows]
|
|
50
|
+
|
|
47
51
|
@ext_iso_forest = Ext.fit_iforest(options)
|
|
48
52
|
end
|
|
49
53
|
|
data/lib/isotree/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: isotree
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.2.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Andrew Kane
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2022-06-13 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rice
|
|
@@ -16,14 +16,14 @@ dependencies:
|
|
|
16
16
|
requirements:
|
|
17
17
|
- - ">="
|
|
18
18
|
- !ruby/object:Gem::Version
|
|
19
|
-
version:
|
|
19
|
+
version: 4.0.2
|
|
20
20
|
type: :runtime
|
|
21
21
|
prerelease: false
|
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
|
23
23
|
requirements:
|
|
24
24
|
- - ">="
|
|
25
25
|
- !ruby/object:Gem::Version
|
|
26
|
-
version:
|
|
26
|
+
version: 4.0.2
|
|
27
27
|
description:
|
|
28
28
|
email: andrew@ankane.org
|
|
29
29
|
executables: []
|
|
@@ -149,7 +149,7 @@ files:
|
|
|
149
149
|
- vendor/isotree/src/serialize.cpp
|
|
150
150
|
- vendor/isotree/src/sql.cpp
|
|
151
151
|
- vendor/isotree/src/utils.cpp
|
|
152
|
-
homepage: https://github.com/ankane/isotree
|
|
152
|
+
homepage: https://github.com/ankane/isotree-ruby
|
|
153
153
|
licenses:
|
|
154
154
|
- BSD-2-Clause
|
|
155
155
|
metadata: {}
|
|
@@ -161,14 +161,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
|
161
161
|
requirements:
|
|
162
162
|
- - ">="
|
|
163
163
|
- !ruby/object:Gem::Version
|
|
164
|
-
version: '2.
|
|
164
|
+
version: '2.6'
|
|
165
165
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
166
166
|
requirements:
|
|
167
167
|
- - ">="
|
|
168
168
|
- !ruby/object:Gem::Version
|
|
169
169
|
version: '0'
|
|
170
170
|
requirements: []
|
|
171
|
-
rubygems_version: 3.
|
|
171
|
+
rubygems_version: 3.3.7
|
|
172
172
|
signing_key:
|
|
173
173
|
specification_version: 4
|
|
174
174
|
summary: Outlier/anomaly detection for Ruby using Isolation Forest
|