outliertree 0.4.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b2c75c112439d30795757595ab3d34de3fbe80049f1b03e7168f0d1eed405417
4
- data.tar.gz: e08ddb70bdf7b1be287b2986fc9364aa554db465fc2ef11b193b8f2adbd7af19
3
+ metadata.gz: ef5a7a99c49ab6a263bfac8293d85a914fb21794566245737640001315d34cc8
4
+ data.tar.gz: 29a5aec79b8e24912b422c279fed5ca58de62775296ee1aaad6af2b0fdbbad7a
5
5
  SHA512:
6
- metadata.gz: 8bf35fb1fad7023c10b5b7514281d6db9c57faacfa8fcbaaa5e39a2aefb43508f5ed1c9c4cac7dd7e96052fac5e0c6036de201223ec3df7241051426e67a349c
7
- data.tar.gz: 36d0ffa632d62be0f2ebe73bb7cace6b4f31eddf6202f26afe8250f27a8bbf25606be9ca8e9b5c89a2596b3acdc0c7124cf039d7b5e9557902a08ae233416293
6
+ metadata.gz: 4b67ff777a77cbd3b01617583deed6c2a033e28b9d7f57721d201e1adc5d72a3ec5ccaa6e4f41337dd607f594dd3a9a1de8ace4ffd55726bcf92b18631bc70cd
7
+ data.tar.gz: 020d5831ea8abedbcc25c130ea7ed2b009d9126ca64e61ecc27cf051cbe451ec5bd6ec01e0b606c7ff834d252119a19da80a199ecab1fab549caa5958f00ae88
data/CHANGELOG.md CHANGED
@@ -1,3 +1,11 @@
1
+ ## 0.5.0 (2026-04-07)
2
+
3
+ - Dropped support for Ruby < 3.3
4
+
5
+ ## 0.4.2 (2025-10-26)
6
+
7
+ - Fixed error with Rice 4.7
8
+
1
9
  ## 0.4.1 (2025-04-23)
2
10
 
3
11
  - Updated OutlierTree to 1.10.0
data/NOTICE.txt CHANGED
@@ -1,5 +1,5 @@
1
1
  Copyright (C) 2019-2020 David Cortes
2
- Copyright (C) 2020-2022 Andrew Kane
2
+ Copyright (C) 2020-2026 Andrew Kane
3
3
 
4
4
  This program is free software: you can redistribute it and/or modify
5
5
  it under the terms of the GNU General Public License as published by
@@ -1,73 +1,55 @@
1
+ #include <cstddef>
2
+ #include <stdexcept>
3
+ #include <vector>
4
+
1
5
  // outliertree
2
6
  #include <outlier_tree.hpp>
3
7
 
8
+ // fix warning
9
+ #undef restrict
10
+
4
11
  // rice
5
12
  #include <rice/rice.hpp>
6
- #include <rice/stl.hpp>
7
13
 
8
- using Rice::Array;
9
14
  using Rice::Hash;
10
- using Rice::Module;
11
- using Rice::Object;
12
15
  using Rice::String;
13
16
  using Rice::Symbol;
14
- using Rice::define_class_under;
15
- using Rice::define_module;
16
17
 
17
- namespace Rice::detail
18
- {
18
+ namespace Rice::detail {
19
19
  template<typename T>
20
- class To_Ruby<std::vector<T>>
21
- {
20
+ class To_Ruby<std::vector<T>> {
22
21
  public:
23
- VALUE convert(std::vector<T> const & x)
24
- {
25
- auto a = rb_ary_new2(x.size());
26
- for (const auto& v : x) {
27
- rb_ary_push(a, To_Ruby<T>().convert(v));
28
- }
29
- return a;
30
- }
31
- };
22
+ To_Ruby() = default;
32
23
 
33
- template<>
34
- class To_Ruby<std::vector<signed char>>
35
- {
36
- public:
37
- VALUE convert(std::vector<signed char> const & x)
38
- {
39
- auto a = rb_ary_new2(x.size());
24
+ explicit To_Ruby(Arg* arg) : arg_(arg) { }
25
+
26
+ VALUE convert(const std::vector<T>& x) {
27
+ auto a = detail::protect(rb_ary_new2, x.size());
40
28
  for (const auto& v : x) {
41
- rb_ary_push(a, To_Ruby<signed char>().convert(v));
29
+ detail::protect(rb_ary_push, a, To_Ruby<T>().convert(v));
42
30
  }
43
31
  return a;
44
32
  }
45
- };
46
33
 
47
- template<>
48
- struct Type<std::vector<signed char>>
49
- {
50
- static bool verify()
51
- {
52
- return true;
53
- }
34
+ private:
35
+ Arg* arg_ = nullptr;
54
36
  };
55
37
 
56
38
  template<>
57
- struct Type<ColType>
58
- {
59
- static bool verify()
60
- {
39
+ struct Type<ColType> {
40
+ static bool verify() {
61
41
  return true;
62
42
  }
63
43
  };
64
44
 
65
45
  template<>
66
- class To_Ruby<ColType>
67
- {
46
+ class To_Ruby<ColType> {
68
47
  public:
69
- VALUE convert(ColType const & x)
70
- {
48
+ To_Ruby() = default;
49
+
50
+ explicit To_Ruby(Arg* arg) : arg_(arg) { }
51
+
52
+ VALUE convert(ColType const & x) {
71
53
  switch (x) {
72
54
  case Numeric: return Symbol("numeric");
73
55
  case Categorical: return Symbol("categorical");
@@ -76,23 +58,26 @@ namespace Rice::detail
76
58
  }
77
59
  throw std::runtime_error("Unknown column type");
78
60
  }
61
+
62
+ private:
63
+ Arg* arg_ = nullptr;
79
64
  };
80
65
 
81
66
  template<>
82
- struct Type<SplitType>
83
- {
84
- static bool verify()
85
- {
67
+ struct Type<SplitType> {
68
+ static bool verify() {
86
69
  return true;
87
70
  }
88
71
  };
89
72
 
90
73
  template<>
91
- class To_Ruby<SplitType>
92
- {
74
+ class To_Ruby<SplitType> {
93
75
  public:
94
- VALUE convert(SplitType const & x)
95
- {
76
+ To_Ruby() = default;
77
+
78
+ explicit To_Ruby(Arg* arg) : arg_(arg) { }
79
+
80
+ VALUE convert(SplitType const & x) {
96
81
  switch (x) {
97
82
  case LessOrEqual: return Symbol("less_or_equal");
98
83
  case Greater: return Symbol("greater");
@@ -107,16 +92,18 @@ namespace Rice::detail
107
92
  }
108
93
  throw std::runtime_error("Unknown split type");
109
94
  }
95
+
96
+ private:
97
+ Arg* arg_ = nullptr;
110
98
  };
111
- }
99
+ } // namespace Rice::detail
112
100
 
113
101
  extern "C"
114
- void Init_ext()
115
- {
116
- Module rb_mOutlierTree = define_module("OutlierTree");
117
- Module rb_mExt = define_module_under(rb_mOutlierTree, "Ext");
102
+ void Init_ext() {
103
+ Rice::Module rb_mOutlierTree = Rice::define_module("OutlierTree");
104
+ Rice::Module rb_mExt = Rice::define_module_under(rb_mOutlierTree, "Ext");
118
105
 
119
- define_class_under<Cluster>(rb_mExt, "Cluster")
106
+ Rice::define_class_under<Cluster>(rb_mExt, "Cluster")
120
107
  .define_method("upper_lim", [](Cluster& self) { return self.upper_lim; })
121
108
  .define_method("display_lim_high", [](Cluster& self) { return self.display_lim_high; })
122
109
  .define_method("perc_below", [](Cluster& self) { return self.perc_below; })
@@ -133,7 +120,7 @@ void Init_ext()
133
120
  .define_method("has_na_branch", [](Cluster& self) { return self.has_NA_branch; })
134
121
  .define_method("col_num", [](Cluster& self) { return self.col_num; });
135
122
 
136
- define_class_under<ClusterTree>(rb_mExt, "ClusterTree")
123
+ Rice::define_class_under<ClusterTree>(rb_mExt, "ClusterTree")
137
124
  .define_method("parent_branch", [](ClusterTree& self) { return self.parent_branch; })
138
125
  .define_method("parent", [](ClusterTree& self) { return self.parent; })
139
126
  .define_method("all_branches", [](ClusterTree& self) { return self.all_branches; })
@@ -143,7 +130,7 @@ void Init_ext()
143
130
  .define_method("split_subset", [](ClusterTree& self) { return self.split_subset; })
144
131
  .define_method("split_lev", [](ClusterTree& self) { return self.split_lev; });
145
132
 
146
- define_class_under<ModelOutputs>(rb_mExt, "ModelOutputs")
133
+ Rice::define_class_under<ModelOutputs>(rb_mExt, "ModelOutputs")
147
134
  .define_method("outlier_scores_final", [](ModelOutputs& self) { return self.outlier_scores_final; })
148
135
  .define_method("outlier_columns_final", [](ModelOutputs& self) { return self.outlier_columns_final; })
149
136
  .define_method("outlier_clusters_final", [](ModelOutputs& self) { return self.outlier_clusters_final; })
@@ -154,12 +141,12 @@ void Init_ext()
154
141
  .define_method(
155
142
  "all_clusters",
156
143
  [](ModelOutputs& self, size_t i, size_t j) {
157
- return self.all_clusters[i][j];
144
+ return self.all_clusters.at(i).at(j);
158
145
  })
159
146
  .define_method(
160
147
  "all_trees",
161
148
  [](ModelOutputs& self, size_t i, size_t j) {
162
- return self.all_trees[i][j];
149
+ return self.all_trees.at(i).at(j);
163
150
  });
164
151
 
165
152
  rb_mExt
@@ -169,47 +156,47 @@ void Init_ext()
169
156
  ModelOutputs model_outputs;
170
157
 
171
158
  // data
172
- size_t nrows = options.get<size_t, Symbol>("nrows");
173
- size_t ncols_numeric = options.get<size_t, Symbol>("ncols_numeric");
174
- size_t ncols_categ = options.get<size_t, Symbol>("ncols_categ");
175
- size_t ncols_ord = options.get<size_t, Symbol>("ncols_ord");
159
+ auto nrows = options.get<size_t, Symbol>("nrows");
160
+ auto ncols_numeric = options.get<size_t, Symbol>("ncols_numeric");
161
+ auto ncols_categ = options.get<size_t, Symbol>("ncols_categ");
162
+ auto ncols_ord = options.get<size_t, Symbol>("ncols_ord");
176
163
 
177
- double *restrict numeric_data = NULL;
164
+ double* numeric_data = nullptr;
178
165
  if (ncols_numeric > 0) {
179
- numeric_data = (double*) options.get<String, Symbol>("numeric_data").c_str();
166
+ numeric_data = reinterpret_cast<double*>(const_cast<char*>(options.get<String, Symbol>("numeric_data").c_str()));
180
167
  }
181
168
 
182
- int *restrict categorical_data = NULL;
183
- int *restrict ncat = NULL;
169
+ int* categorical_data = nullptr;
170
+ int* ncat = nullptr;
184
171
  if (ncols_categ > 0) {
185
- categorical_data = (int*) options.get<String, Symbol>("categorical_data").c_str();
186
- ncat = (int*) options.get<String, Symbol>("ncat").c_str();
172
+ categorical_data = reinterpret_cast<int*>(const_cast<char*>(options.get<String, Symbol>("categorical_data").c_str()));
173
+ ncat = reinterpret_cast<int*>(const_cast<char*>(options.get<String, Symbol>("ncat").c_str()));
187
174
  }
188
175
 
189
- int *restrict ordinal_data = NULL;
190
- int *restrict ncat_ord = NULL;
176
+ int* ordinal_data = nullptr;
177
+ int* ncat_ord = nullptr;
191
178
  if (ncols_ord > 0) {
192
- ordinal_data = (int*) options.get<String, Symbol>("ordinal_data").c_str();
193
- ncat_ord = (int*) options.get<String, Symbol>("ncat_ord").c_str();
179
+ ordinal_data = reinterpret_cast<int*>(const_cast<char*>(options.get<String, Symbol>("ordinal_data").c_str()));
180
+ ncat_ord = reinterpret_cast<int*>(const_cast<char*>(options.get<String, Symbol>("ncat_ord").c_str()));
194
181
  }
195
182
 
196
183
  // options
197
- char *restrict cols_ignore = NULL;
198
- int nthreads = options.get<int, Symbol>("nthreads");
199
- bool categ_as_bin = options.get<bool, Symbol>("categ_as_bin");
200
- bool ord_as_bin = options.get<bool, Symbol>("ord_as_bin");
201
- bool cat_bruteforce_subset = options.get<bool, Symbol>("cat_bruteforce_subset");
202
- bool categ_from_maj = options.get<bool, Symbol>("categ_from_maj");
203
- bool take_mid = options.get<bool, Symbol>("take_mid");
204
- size_t max_depth = options.get<size_t, Symbol>("max_depth");
205
- double max_perc_outliers = options.get<double, Symbol>("pct_outliers");
206
- size_t min_size_numeric = options.get<size_t, Symbol>("min_size_numeric");
207
- size_t min_size_categ = options.get<size_t, Symbol>("min_size_categ");
208
- double min_gain = options.get<double, Symbol>("min_gain");
209
- bool gain_as_pct = options.get<bool, Symbol>("gain_as_pct");
210
- bool follow_all = options.get<bool, Symbol>("follow_all");
211
- double z_norm = options.get<double, Symbol>("z_norm");
212
- double z_outlier = options.get<double, Symbol>("z_outlier");
184
+ char* cols_ignore = nullptr;
185
+ auto nthreads = options.get<int, Symbol>("nthreads");
186
+ auto categ_as_bin = options.get<bool, Symbol>("categ_as_bin");
187
+ auto ord_as_bin = options.get<bool, Symbol>("ord_as_bin");
188
+ auto cat_bruteforce_subset = options.get<bool, Symbol>("cat_bruteforce_subset");
189
+ auto categ_from_maj = options.get<bool, Symbol>("categ_from_maj");
190
+ auto take_mid = options.get<bool, Symbol>("take_mid");
191
+ auto max_depth = options.get<size_t, Symbol>("max_depth");
192
+ auto max_perc_outliers = options.get<double, Symbol>("pct_outliers");
193
+ auto min_size_numeric = options.get<size_t, Symbol>("min_size_numeric");
194
+ auto min_size_categ = options.get<size_t, Symbol>("min_size_categ");
195
+ auto min_gain = options.get<double, Symbol>("min_gain");
196
+ auto gain_as_pct = options.get<bool, Symbol>("gain_as_pct");
197
+ auto follow_all = options.get<bool, Symbol>("follow_all");
198
+ auto z_norm = options.get<double, Symbol>("z_norm");
199
+ auto z_outlier = options.get<double, Symbol>("z_outlier");
213
200
 
214
201
  fit_outliers_models(
215
202
  model_outputs,
@@ -245,28 +232,28 @@ void Init_ext()
245
232
  "find_new_outliers",
246
233
  [](ModelOutputs& model_outputs, Hash options) {
247
234
  // data
248
- size_t nrows = options.get<size_t, Symbol>("nrows");
249
- size_t ncols_numeric = options.get<size_t, Symbol>("ncols_numeric");
250
- size_t ncols_categ = options.get<size_t, Symbol>("ncols_categ");
251
- size_t ncols_ord = options.get<size_t, Symbol>("ncols_ord");
235
+ auto nrows = options.get<size_t, Symbol>("nrows");
236
+ auto ncols_numeric = options.get<size_t, Symbol>("ncols_numeric");
237
+ auto ncols_categ = options.get<size_t, Symbol>("ncols_categ");
238
+ auto ncols_ord = options.get<size_t, Symbol>("ncols_ord");
252
239
 
253
- double *restrict numeric_data = NULL;
240
+ double* numeric_data = nullptr;
254
241
  if (ncols_numeric > 0) {
255
- numeric_data = (double*) options.get<String, Symbol>("numeric_data").c_str();
242
+ numeric_data = reinterpret_cast<double*>(const_cast<char*>(options.get<String, Symbol>("numeric_data").c_str()));
256
243
  }
257
244
 
258
- int *restrict categorical_data = NULL;
245
+ int* categorical_data = nullptr;
259
246
  if (ncols_categ > 0) {
260
- categorical_data = (int*) options.get<String, Symbol>("categorical_data").c_str();
247
+ categorical_data = reinterpret_cast<int*>(const_cast<char*>(options.get<String, Symbol>("categorical_data").c_str()));
261
248
  }
262
249
 
263
- int *restrict ordinal_data = NULL;
250
+ int* ordinal_data = nullptr;
264
251
  if (ncols_ord > 0) {
265
- ordinal_data = (int*) options.get<String, Symbol>("ordinal_data").c_str();
252
+ ordinal_data = reinterpret_cast<int*>(const_cast<char*>(options.get<String, Symbol>("ordinal_data").c_str()));
266
253
  }
267
254
 
268
255
  // options
269
- int nthreads = options.get<int, Symbol>("nthreads");
256
+ auto nthreads = options.get<int, Symbol>("nthreads");
270
257
 
271
258
  find_new_outliers(
272
259
  numeric_data,
@@ -1,11 +1,20 @@
1
1
  module OutlierTree
2
2
  class Model
3
3
  def initialize(
4
- max_depth: 4, min_gain: 0.01, z_norm: 2.67, z_outlier: 8.0, pct_outliers: 0.01,
5
- min_size_numeric: 25, min_size_categ: 50, categ_split: "binarize", categ_outliers: "tail",
6
- numeric_split: "raw", follow_all: false, gain_as_pct: true, nthreads: -1
4
+ max_depth: 4,
5
+ min_gain: 0.01,
6
+ z_norm: 2.67,
7
+ z_outlier: 8.0,
8
+ pct_outliers: 0.01,
9
+ min_size_numeric: 25,
10
+ min_size_categ: 50,
11
+ categ_split: "binarize",
12
+ categ_outliers: "tail",
13
+ numeric_split: "raw",
14
+ follow_all: false,
15
+ gain_as_pct: true,
16
+ nthreads: -1
7
17
  )
8
-
9
18
  # TODO validate values
10
19
  @max_depth = max_depth
11
20
  @min_gain = min_gain
@@ -1,3 +1,3 @@
1
1
  module OutlierTree
2
- VERSION = "0.4.1"
2
+ VERSION = "0.5.0"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: outliertree
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.1
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
@@ -65,14 +65,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
65
65
  requirements:
66
66
  - - ">="
67
67
  - !ruby/object:Gem::Version
68
- version: '3.1'
68
+ version: '3.3'
69
69
  required_rubygems_version: !ruby/object:Gem::Requirement
70
70
  requirements:
71
71
  - - ">="
72
72
  - !ruby/object:Gem::Version
73
73
  version: '0'
74
74
  requirements: []
75
- rubygems_version: 3.6.7
75
+ rubygems_version: 4.0.6
76
76
  specification_version: 4
77
77
  summary: Explainable outlier/anomaly detection for Ruby
78
78
  test_files: []