isotree 0.1.5 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a5974c65c4adc4fd79ee7770f324c906a2788534f3fe6f381d61711e7cdce78c
4
- data.tar.gz: d401e7c5aaabcd5dcffd5e4f54d93ea4d0966698854e70d3475fd79a92e1e241
3
+ metadata.gz: 1876c50ef4d9bbd7fc7898c222b9f10b8d287a38cf9d37af53e9841f63494299
4
+ data.tar.gz: ea7b60ae9683498df1910fb3f4774ce8c94daf9fe2a6e23ffd87a9d488bcc5ce
5
5
  SHA512:
6
- metadata.gz: d00de0c3902b4f7fd3e13e08f0738ec4daf8be173f60cf46f49318e76f5be3c4ef3edd925f83074e03b4fe181aa6384d43c749f5f72247cf08d44f6811a6fe80
7
- data.tar.gz: 70195ae442c2e4762b2f900a82a891d8f991ace1e521d625f910a1b5e3482334a299674c255e1aebf855728ee169ab5886428baf6df9a5e436bc73f3aff6dda6
6
+ metadata.gz: 3d0f6d95cac8b7dd457512c0ce41c3ae286cf8f344ccc8a47dee1c00104657b3d2d9c97be1c1211fcb253065ba7fcf3d3b955a89dd9bac8b2b6e5e35516c7b7d
7
+ data.tar.gz: 476d7c8ffe5c252ba94ecc7a68e59fc3afbbb2a732fd9e3127e032e543a74957fcdef6173b6225dfee7370dbb86625b61b02d16a68cc7257be8f7cbdae87a581
data/CHANGELOG.md CHANGED
@@ -1,3 +1,16 @@
1
+ ## 0.2.2 (2022-06-12)
2
+
3
+ - Fixed segfault when data is smaller than sample size
4
+
5
+ ## 0.2.1 (2021-05-23)
6
+
7
+ - Improved performance
8
+
9
+ ## 0.2.0 (2021-05-17)
10
+
11
+ - Updated to Rice 4
12
+ - Dropped support for Ruby < 2.6
13
+
1
14
  ## 0.1.5 (2021-03-14)
2
15
 
3
16
  - Updated Isotree to 0.1.25
data/README.md CHANGED
@@ -1,19 +1,19 @@
1
- # IsoTree
1
+ # IsoTree Ruby
2
2
 
3
3
  :evergreen_tree: [IsoTree](https://github.com/david-cortes/isotree) - outlier/anomaly detection using Isolation Forest - for Ruby
4
4
 
5
5
  Learn how [Isolation Forest](https://www.youtube.com/watch?v=RyFQXQf4w4w) works
6
6
 
7
- :deciduous_tree: Check out [OutlierTree](https://github.com/ankane/outliertree) for human-readable explanations of outliers
7
+ :deciduous_tree: Check out [OutlierTree](https://github.com/ankane/outliertree-ruby) for human-readable explanations of outliers
8
8
 
9
- [![Build Status](https://github.com/ankane/isotree/workflows/build/badge.svg?branch=master)](https://github.com/ankane/isotree/actions)
9
+ [![Build Status](https://github.com/ankane/isotree-ruby/workflows/build/badge.svg?branch=master)](https://github.com/ankane/isotree-ruby/actions)
10
10
 
11
11
  ## Installation
12
12
 
13
13
  Add this line to your application’s Gemfile:
14
14
 
15
15
  ```ruby
16
- gem 'isotree'
16
+ gem "isotree"
17
17
  ```
18
18
 
19
19
  ## Getting Started
@@ -147,22 +147,22 @@ model.predict(data, output: "avg_depth")
147
147
 
148
148
  ## History
149
149
 
150
- View the [changelog](https://github.com/ankane/isotree/blob/master/CHANGELOG.md)
150
+ View the [changelog](https://github.com/ankane/isotree-ruby/blob/master/CHANGELOG.md)
151
151
 
152
152
  ## Contributing
153
153
 
154
154
  Everyone is encouraged to help improve this project. Here are a few ways you can help:
155
155
 
156
- - [Report bugs](https://github.com/ankane/isotree/issues)
157
- - Fix bugs and [submit pull requests](https://github.com/ankane/isotree/pulls)
156
+ - [Report bugs](https://github.com/ankane/isotree-ruby/issues)
157
+ - Fix bugs and [submit pull requests](https://github.com/ankane/isotree-ruby/pulls)
158
158
  - Write, clarify, or fix documentation
159
159
  - Suggest or add new features
160
160
 
161
161
  To get started with development:
162
162
 
163
163
  ```sh
164
- git clone --recursive https://github.com/ankane/isotree.git
165
- cd isotree
164
+ git clone --recursive https://github.com/ankane/isotree-ruby.git
165
+ cd isotree-ruby
166
166
  bundle install
167
167
  bundle exec rake compile
168
168
  bundle exec rake test
data/ext/isotree/ext.cpp CHANGED
@@ -2,12 +2,7 @@
2
2
  #include <isotree.hpp>
3
3
 
4
4
  // rice
5
- #include <rice/Array.hpp>
6
- #include <rice/Hash.hpp>
7
- #include <rice/Module.hpp>
8
- #include <rice/Object.hpp>
9
- #include <rice/String.hpp>
10
- #include <rice/Symbol.hpp>
5
+ #include <rice/rice.hpp>
11
6
 
12
7
  using Rice::Array;
13
8
  using Rice::Hash;
@@ -18,62 +13,89 @@ using Rice::Symbol;
18
13
  using Rice::define_class_under;
19
14
  using Rice::define_module;
20
15
 
21
- template<>
22
- NewCategAction from_ruby<NewCategAction>(Object x)
16
+ namespace Rice::detail
23
17
  {
24
- auto value = x.to_s().str();
25
- if (value == "weighted") return Weighted;
26
- if (value == "smallest") return Smallest;
27
- if (value == "random") return Random;
28
- throw std::runtime_error("Unknown new categ action: " + value);
29
- }
18
+ template<>
19
+ class From_Ruby<NewCategAction>
20
+ {
21
+ public:
22
+ NewCategAction convert(VALUE x)
23
+ {
24
+ auto value = Object(x).to_s().str();
25
+ if (value == "weighted") return Weighted;
26
+ if (value == "smallest") return Smallest;
27
+ if (value == "random") return Random;
28
+ throw std::runtime_error("Unknown new categ action: " + value);
29
+ }
30
+ };
30
31
 
31
- template<>
32
- MissingAction from_ruby<MissingAction>(Object x)
33
- {
34
- auto value = x.to_s().str();
35
- if (value == "divide") return Divide;
36
- if (value == "impute") return Impute;
37
- if (value == "fail") return Fail;
38
- throw std::runtime_error("Unknown missing action: " + value);
39
- }
32
+ template<>
33
+ class From_Ruby<MissingAction>
34
+ {
35
+ public:
36
+ MissingAction convert(VALUE x)
37
+ {
38
+ auto value = Object(x).to_s().str();
39
+ if (value == "divide") return Divide;
40
+ if (value == "impute") return Impute;
41
+ if (value == "fail") return Fail;
42
+ throw std::runtime_error("Unknown missing action: " + value);
43
+ }
44
+ };
40
45
 
41
- template<>
42
- CategSplit from_ruby<CategSplit>(Object x)
43
- {
44
- auto value = x.to_s().str();
45
- if (value == "subset") return SubSet;
46
- if (value == "single_categ") return SingleCateg;
47
- throw std::runtime_error("Unknown categ split: " + value);
48
- }
46
+ template<>
47
+ class From_Ruby<CategSplit>
48
+ {
49
+ public:
50
+ CategSplit convert(VALUE x)
51
+ {
52
+ auto value = Object(x).to_s().str();
53
+ if (value == "subset") return SubSet;
54
+ if (value == "single_categ") return SingleCateg;
55
+ throw std::runtime_error("Unknown categ split: " + value);
56
+ }
57
+ };
49
58
 
50
- template<>
51
- CoefType from_ruby<CoefType>(Object x)
52
- {
53
- auto value = x.to_s().str();
54
- if (value == "uniform") return Uniform;
55
- if (value == "normal") return Normal;
56
- throw std::runtime_error("Unknown coef type: " + value);
57
- }
59
+ template<>
60
+ class From_Ruby<CoefType>
61
+ {
62
+ public:
63
+ CoefType convert(VALUE x)
64
+ {
65
+ auto value = Object(x).to_s().str();
66
+ if (value == "uniform") return Uniform;
67
+ if (value == "normal") return Normal;
68
+ throw std::runtime_error("Unknown coef type: " + value);
69
+ }
70
+ };
58
71
 
59
- template<>
60
- UseDepthImp from_ruby<UseDepthImp>(Object x)
61
- {
62
- auto value = x.to_s().str();
63
- if (value == "lower") return Lower;
64
- if (value == "higher") return Higher;
65
- if (value == "same") return Same;
66
- throw std::runtime_error("Unknown depth imp: " + value);
67
- }
72
+ template<>
73
+ class From_Ruby<UseDepthImp>
74
+ {
75
+ public:
76
+ UseDepthImp convert(VALUE x)
77
+ {
78
+ auto value = Object(x).to_s().str();
79
+ if (value == "lower") return Lower;
80
+ if (value == "higher") return Higher;
81
+ if (value == "same") return Same;
82
+ throw std::runtime_error("Unknown depth imp: " + value);
83
+ }
84
+ };
68
85
 
69
- template<>
70
- WeighImpRows from_ruby<WeighImpRows>(Object x)
71
- {
72
- auto value = x.to_s().str();
73
- if (value == "inverse") return Inverse;
74
- if (value == "prop") return Prop;
75
- if (value == "flat") return Flat;
76
- throw std::runtime_error("Unknown weight imp rows: " + value);
86
+ template<>
87
+ class From_Ruby<WeighImpRows>
88
+ {
89
+ public:
90
+ WeighImpRows convert(VALUE x)
91
+ {
92
+ auto value = Object(x).to_s().str();
93
+ if (value == "inverse") return Inverse;
94
+ if (value == "prop") return Prop;
95
+ if (value == "flat") return Flat;
96
+ throw std::runtime_error("Unknown weight imp rows: " + value);
97
+ }
98
+ };
77
99
  }
78
100
 
79
101
  extern "C"
@@ -85,9 +107,9 @@ void Init_ext()
85
107
  define_class_under<ExtIsoForest>(rb_mExt, "ExtIsoForest");
86
108
 
87
109
  rb_mExt
88
- .define_singleton_method(
110
+ .define_singleton_function(
89
111
  "fit_iforest",
90
- *[](Hash options) {
112
+ [](Hash options) {
91
113
  // model
92
114
  ExtIsoForest iso;
93
115
 
@@ -204,9 +226,9 @@ void Init_ext()
204
226
 
205
227
  return iso;
206
228
  })
207
- .define_singleton_method(
229
+ .define_singleton_function(
208
230
  "predict_iforest",
209
- *[](ExtIsoForest& iso, Hash options) {
231
+ [](ExtIsoForest& iso, Hash options) {
210
232
  // data
211
233
  size_t nrows = options.get<size_t, Symbol>("nrows");
212
234
  size_t ncols_numeric = options.get<size_t, Symbol>("ncols_numeric");
@@ -260,9 +282,9 @@ void Init_ext()
260
282
  }
261
283
  return ret;
262
284
  })
263
- .define_singleton_method(
285
+ .define_singleton_function(
264
286
  "serialize_ext_isoforest",
265
- *[](ExtIsoForest& iso, String path) {
287
+ [](ExtIsoForest& iso, String path) {
266
288
  #ifdef _MSC_VER
267
289
  // TODO convert to wchar_t
268
290
  throw std::runtime_error("Not supported on Windows yet");
@@ -270,9 +292,9 @@ void Init_ext()
270
292
  serialize_ext_isoforest(iso, path.c_str());
271
293
  #endif
272
294
  })
273
- .define_singleton_method(
295
+ .define_singleton_function(
274
296
  "deserialize_ext_isoforest",
275
- *[](String path) {
297
+ [](String path) {
276
298
  ExtIsoForest iso;
277
299
 
278
300
  #ifdef _MSC_VER
@@ -1,6 +1,6 @@
1
1
  require "mkmf-rice"
2
2
 
3
- $CXXFLAGS += " -std=c++11 -D_USE_MERSENNE_TWISTER -D_ENABLE_CEREAL"
3
+ $CXXFLAGS += " -std=c++17 $(optflags) -D_USE_MERSENNE_TWISTER -D_ENABLE_CEREAL"
4
4
 
5
5
  apple_clang = RbConfig::CONFIG["CC_VERSION_MESSAGE"] =~ /apple clang/i
6
6
 
@@ -32,7 +32,6 @@ module IsoTree
32
32
  raise ArgumentError, "Array elements must be all hashes or arrays"
33
33
  end
34
34
 
35
- nrows = data.size
36
35
  ncols = data.first ? data.first.size : 0
37
36
  if data.any? { |r| r.size != ncols }
38
37
  raise ArgumentError, "All rows must have the same number of columns"
@@ -44,6 +44,10 @@ module IsoTree
44
44
  prep_fit(x)
45
45
  options = data_options(x).merge(fit_options)
46
46
  options[:sample_size] ||= options[:nrows]
47
+
48
+ # prevent segfault
49
+ options[:sample_size] = options[:nrows] if options[:sample_size] > options[:nrows]
50
+
47
51
  @ext_iso_forest = Ext.fit_iforest(options)
48
52
  end
49
53
 
@@ -1,3 +1,3 @@
1
1
  module IsoTree
2
- VERSION = "0.1.5"
2
+ VERSION = "0.2.2"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: isotree
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-03-15 00:00:00.000000000 Z
11
+ date: 2022-06-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rice
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: '2.2'
19
+ version: 4.0.2
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: '2.2'
26
+ version: 4.0.2
27
27
  description:
28
28
  email: andrew@ankane.org
29
29
  executables: []
@@ -149,7 +149,7 @@ files:
149
149
  - vendor/isotree/src/serialize.cpp
150
150
  - vendor/isotree/src/sql.cpp
151
151
  - vendor/isotree/src/utils.cpp
152
- homepage: https://github.com/ankane/isotree
152
+ homepage: https://github.com/ankane/isotree-ruby
153
153
  licenses:
154
154
  - BSD-2-Clause
155
155
  metadata: {}
@@ -161,14 +161,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
161
161
  requirements:
162
162
  - - ">="
163
163
  - !ruby/object:Gem::Version
164
- version: '2.5'
164
+ version: '2.6'
165
165
  required_rubygems_version: !ruby/object:Gem::Requirement
166
166
  requirements:
167
167
  - - ">="
168
168
  - !ruby/object:Gem::Version
169
169
  version: '0'
170
170
  requirements: []
171
- rubygems_version: 3.2.3
171
+ rubygems_version: 3.3.7
172
172
  signing_key:
173
173
  specification_version: 4
174
174
  summary: Outlier/anomaly detection for Ruby using Isolation Forest