outliertree 0.4.0 → 0.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 107a39daf1b8743880c65c0c9bd20f6b2430687a843aa3394e4f57ba38b58766
4
- data.tar.gz: 81e5e13612dd119624a6ec12652b048002c0c2103ee6389709682fb6bcb27e5e
3
+ metadata.gz: fd25a8154076c68420d1908a6dcb29cf0b9d1fec972cbc24063c3c4cf6de63e4
4
+ data.tar.gz: a834dcb5791ee8083d7ae8721cb22abfb468e123ae78940143a33df0d2ef0f98
5
5
  SHA512:
6
- metadata.gz: 2a8c6276389a465d548b7b06e7933e64094059960301b4393015bd906dd8deed361887876c152017bc2427fe54b81271e076de24f3e1df801f8f0c330a6c0f76
7
- data.tar.gz: 27b9eb4c42adc7abf6c905ec3c787f6947aae6475ecb37283c9b00e560ebb49a8a6bd7ebacfce2c636ba289f014b6dd87821d65311cd3a8640700a4dae44464d
6
+ metadata.gz: 0ab3d03adf97e689d188a91ceb86b77fbe69bbc45ca64426f84ae93360df2cdd395f5c1b66d7d8f371d1212c4a615c2466bdb8d041ef60d200d092967bbd3341
7
+ data.tar.gz: eb60aee1c0bb479db304491c804209e9c3ea51a5d089c5c05b90f1490c0190339b7a8f7c8b64c20b82ffafa7853f8f3cce2493647d95f0d86cbc890df4226987
data/CHANGELOG.md CHANGED
@@ -1,3 +1,11 @@
1
+ ## 0.4.2 (2025-10-26)
2
+
3
+ - Fixed error with Rice 4.7
4
+
5
+ ## 0.4.1 (2025-04-23)
6
+
7
+ - Updated OutlierTree to 1.10.0
8
+
1
9
  ## 0.4.0 (2024-06-11)
2
10
 
3
11
  - Updated OutlierTree to 1.9.0
@@ -1,73 +1,83 @@
1
+ #include <complex>
2
+ #include <vector>
3
+
1
4
  // outliertree
2
5
  #include <outlier_tree.hpp>
3
6
 
7
+ // fix warning
8
+ #undef restrict
9
+
4
10
  // rice
5
11
  #include <rice/rice.hpp>
6
12
  #include <rice/stl.hpp>
7
13
 
8
14
  using Rice::Array;
9
15
  using Rice::Hash;
10
- using Rice::Module;
11
16
  using Rice::Object;
12
17
  using Rice::String;
13
18
  using Rice::Symbol;
14
- using Rice::define_class_under;
15
- using Rice::define_module;
16
19
 
17
- namespace Rice::detail
18
- {
20
+ namespace Rice::detail {
19
21
  template<typename T>
20
- class To_Ruby<std::vector<T>>
21
- {
22
+ class To_Ruby<std::vector<T>> {
22
23
  public:
23
- VALUE convert(std::vector<T> const & x)
24
- {
24
+ To_Ruby() = default;
25
+
26
+ explicit To_Ruby(Arg* arg) : arg_(arg) { }
27
+
28
+ VALUE convert(std::vector<T> const & x) {
25
29
  auto a = rb_ary_new2(x.size());
26
30
  for (const auto& v : x) {
27
31
  rb_ary_push(a, To_Ruby<T>().convert(v));
28
32
  }
29
33
  return a;
30
34
  }
35
+
36
+ private:
37
+ Arg* arg_ = nullptr;
31
38
  };
32
39
 
33
40
  template<>
34
- class To_Ruby<std::vector<signed char>>
35
- {
41
+ class To_Ruby<std::vector<signed char>> {
36
42
  public:
37
- VALUE convert(std::vector<signed char> const & x)
38
- {
43
+ To_Ruby() = default;
44
+
45
+ explicit To_Ruby(Arg* arg) : arg_(arg) { }
46
+
47
+ VALUE convert(std::vector<signed char> const & x) {
39
48
  auto a = rb_ary_new2(x.size());
40
49
  for (const auto& v : x) {
41
50
  rb_ary_push(a, To_Ruby<signed char>().convert(v));
42
51
  }
43
52
  return a;
44
53
  }
54
+
55
+ private:
56
+ Arg* arg_ = nullptr;
45
57
  };
46
58
 
47
59
  template<>
48
- struct Type<std::vector<signed char>>
49
- {
50
- static bool verify()
51
- {
60
+ struct Type<std::vector<signed char>> {
61
+ static bool verify() {
52
62
  return true;
53
63
  }
54
64
  };
55
65
 
56
66
  template<>
57
- struct Type<ColType>
58
- {
59
- static bool verify()
60
- {
67
+ struct Type<ColType> {
68
+ static bool verify() {
61
69
  return true;
62
70
  }
63
71
  };
64
72
 
65
73
  template<>
66
- class To_Ruby<ColType>
67
- {
74
+ class To_Ruby<ColType> {
68
75
  public:
69
- VALUE convert(ColType const & x)
70
- {
76
+ To_Ruby() = default;
77
+
78
+ explicit To_Ruby(Arg* arg) : arg_(arg) { }
79
+
80
+ VALUE convert(ColType const & x) {
71
81
  switch (x) {
72
82
  case Numeric: return Symbol("numeric");
73
83
  case Categorical: return Symbol("categorical");
@@ -76,23 +86,26 @@ namespace Rice::detail
76
86
  }
77
87
  throw std::runtime_error("Unknown column type");
78
88
  }
89
+
90
+ private:
91
+ Arg* arg_ = nullptr;
79
92
  };
80
93
 
81
94
  template<>
82
- struct Type<SplitType>
83
- {
84
- static bool verify()
85
- {
95
+ struct Type<SplitType> {
96
+ static bool verify() {
86
97
  return true;
87
98
  }
88
99
  };
89
100
 
90
101
  template<>
91
- class To_Ruby<SplitType>
92
- {
102
+ class To_Ruby<SplitType> {
93
103
  public:
94
- VALUE convert(SplitType const & x)
95
- {
104
+ To_Ruby() = default;
105
+
106
+ explicit To_Ruby(Arg* arg) : arg_(arg) { }
107
+
108
+ VALUE convert(SplitType const & x) {
96
109
  switch (x) {
97
110
  case LessOrEqual: return Symbol("less_or_equal");
98
111
  case Greater: return Symbol("greater");
@@ -107,16 +120,18 @@ namespace Rice::detail
107
120
  }
108
121
  throw std::runtime_error("Unknown split type");
109
122
  }
123
+
124
+ private:
125
+ Arg* arg_ = nullptr;
110
126
  };
111
- }
127
+ } // namespace Rice::detail
112
128
 
113
129
  extern "C"
114
- void Init_ext()
115
- {
116
- Module rb_mOutlierTree = define_module("OutlierTree");
117
- Module rb_mExt = define_module_under(rb_mOutlierTree, "Ext");
130
+ void Init_ext() {
131
+ Rice::Module rb_mOutlierTree = Rice::define_module("OutlierTree");
132
+ Rice::Module rb_mExt = Rice::define_module_under(rb_mOutlierTree, "Ext");
118
133
 
119
- define_class_under<Cluster>(rb_mExt, "Cluster")
134
+ Rice::define_class_under<Cluster>(rb_mExt, "Cluster")
120
135
  .define_method("upper_lim", [](Cluster& self) { return self.upper_lim; })
121
136
  .define_method("display_lim_high", [](Cluster& self) { return self.display_lim_high; })
122
137
  .define_method("perc_below", [](Cluster& self) { return self.perc_below; })
@@ -133,7 +148,7 @@ void Init_ext()
133
148
  .define_method("has_na_branch", [](Cluster& self) { return self.has_NA_branch; })
134
149
  .define_method("col_num", [](Cluster& self) { return self.col_num; });
135
150
 
136
- define_class_under<ClusterTree>(rb_mExt, "ClusterTree")
151
+ Rice::define_class_under<ClusterTree>(rb_mExt, "ClusterTree")
137
152
  .define_method("parent_branch", [](ClusterTree& self) { return self.parent_branch; })
138
153
  .define_method("parent", [](ClusterTree& self) { return self.parent; })
139
154
  .define_method("all_branches", [](ClusterTree& self) { return self.all_branches; })
@@ -143,7 +158,7 @@ void Init_ext()
143
158
  .define_method("split_subset", [](ClusterTree& self) { return self.split_subset; })
144
159
  .define_method("split_lev", [](ClusterTree& self) { return self.split_lev; });
145
160
 
146
- define_class_under<ModelOutputs>(rb_mExt, "ModelOutputs")
161
+ Rice::define_class_under<ModelOutputs>(rb_mExt, "ModelOutputs")
147
162
  .define_method("outlier_scores_final", [](ModelOutputs& self) { return self.outlier_scores_final; })
148
163
  .define_method("outlier_columns_final", [](ModelOutputs& self) { return self.outlier_columns_final; })
149
164
  .define_method("outlier_clusters_final", [](ModelOutputs& self) { return self.outlier_clusters_final; })
@@ -1,11 +1,20 @@
1
1
  module OutlierTree
2
2
  class Model
3
3
  def initialize(
4
- max_depth: 4, min_gain: 0.01, z_norm: 2.67, z_outlier: 8.0, pct_outliers: 0.01,
5
- min_size_numeric: 25, min_size_categ: 50, categ_split: "binarize", categ_outliers: "tail",
6
- numeric_split: "raw", follow_all: false, gain_as_pct: true, nthreads: -1
4
+ max_depth: 4,
5
+ min_gain: 0.01,
6
+ z_norm: 2.67,
7
+ z_outlier: 8.0,
8
+ pct_outliers: 0.01,
9
+ min_size_numeric: 25,
10
+ min_size_categ: 50,
11
+ categ_split: "binarize",
12
+ categ_outliers: "tail",
13
+ numeric_split: "raw",
14
+ follow_all: false,
15
+ gain_as_pct: true,
16
+ nthreads: -1
7
17
  )
8
-
9
18
  # TODO validate values
10
19
  @max_depth = max_depth
11
20
  @min_gain = min_gain
@@ -80,7 +80,7 @@ module OutlierTree
80
80
  column: column,
81
81
  value: value,
82
82
  conditions: conditions,
83
- group_statistics: group_statistics,
83
+ group_statistics: group_statistics
84
84
  # leave out for simplicity
85
85
  # score: score,
86
86
  # tree_depth: model_outputs.outlier_depth_final[row],
@@ -128,7 +128,7 @@ module OutlierTree
128
128
  column: cond_col,
129
129
  comparison: colcond,
130
130
  to: condval,
131
- value: colval,
131
+ value: colval
132
132
  # leave out for simplicity
133
133
  # decimals: coldecim
134
134
  }
@@ -1,3 +1,3 @@
1
1
  module OutlierTree
2
- VERSION = "0.4.0"
2
+ VERSION = "0.4.2"
3
3
  end
@@ -173,8 +173,7 @@ bool define_numerical_cluster(double *restrict x, size_t *restrict ix_arr, size_
173
173
  if ((!isinf(left_tail) || !isinf(right_tail)) && !is_log_transf && !is_exp_transf) {
174
174
  sd *= 0.5;
175
175
  }
176
- while (std::numeric_limits<double>::epsilon() > sd*std::fmin(min_gap, z_norm))
177
- sd = std::nextafter(sd, std::numeric_limits<double>::infinity());
176
+ sd = std::fmax(sd, std::numeric_limits<double>::epsilon() / std::fmin(min_gap, z_norm));
178
177
  cluster.cluster_mean = mean;
179
178
  cluster.cluster_sd = sd;
180
179
  cnt = end - st + 1;
@@ -218,8 +217,8 @@ bool define_numerical_cluster(double *restrict x, size_t *restrict ix_arr, size_
218
217
  cluster.display_lim_low = orig_x[ix_arr[row + 1]];
219
218
  cluster.perc_above = (long double)(end - st_normals + 1) / (long double)(end - st + 1);
220
219
 
221
- while (cluster.display_lim_low <= cluster.lower_lim) {
222
- cluster.lower_lim = std::nextafter(cluster.lower_lim, -std::numeric_limits<double>::infinity());
220
+ if (cluster.display_lim_low <= cluster.lower_lim) {
221
+ cluster.lower_lim = std::nextafter(cluster.display_lim_low, -std::numeric_limits<double>::infinity());
223
222
  }
224
223
  break;
225
224
  }
@@ -292,8 +291,8 @@ bool define_numerical_cluster(double *restrict x, size_t *restrict ix_arr, size_
292
291
  }
293
292
 
294
293
  if (cluster.lower_lim > -HUGE_VAL) {
295
- while (cluster.lower_lim >= orig_x[ix_arr[st]]) {
296
- cluster.lower_lim = std::nextafter(cluster.lower_lim, -std::numeric_limits<double>::infinity());
294
+ if (cluster.lower_lim >= orig_x[ix_arr[st]]) {
295
+ cluster.lower_lim = std::nextafter(orig_x[ix_arr[st]], -std::numeric_limits<double>::infinity());
297
296
  }
298
297
  }
299
298
 
@@ -343,8 +342,8 @@ bool define_numerical_cluster(double *restrict x, size_t *restrict ix_arr, size_
343
342
  cluster.display_lim_high = orig_x[ix_arr[row - 1]];
344
343
  cluster.perc_below = (long double)(end_normals - st + 1) / (long double)(end - st + 1);
345
344
 
346
- while (cluster.display_lim_high >= cluster.upper_lim) {
347
- cluster.upper_lim = std::nextafter(cluster.upper_lim, std::numeric_limits<double>::infinity());
345
+ if (cluster.display_lim_high >= cluster.upper_lim) {
346
+ cluster.upper_lim = std::nextafter(cluster.display_lim_high, -std::numeric_limits<double>::infinity());
348
347
  }
349
348
  break;
350
349
  }
@@ -401,8 +400,8 @@ bool define_numerical_cluster(double *restrict x, size_t *restrict ix_arr, size_
401
400
  }
402
401
 
403
402
  if (cluster.upper_lim < HUGE_VAL) {
404
- while (cluster.upper_lim <= orig_x[ix_arr[end]]) {
405
- cluster.upper_lim = std::nextafter(cluster.upper_lim, std::numeric_limits<double>::infinity());
403
+ if (cluster.upper_lim <= orig_x[ix_arr[end]]) {
404
+ cluster.upper_lim = std::nextafter(orig_x[ix_arr[end]], std::numeric_limits<double>::infinity());
406
405
  }
407
406
  }
408
407
 
metadata CHANGED
@@ -1,14 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: outliertree
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.4.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
- autorequire:
9
8
  bindir: bin
10
9
  cert_chain: []
11
- date: 2024-06-12 00:00:00.000000000 Z
10
+ date: 1980-01-02 00:00:00.000000000 Z
12
11
  dependencies:
13
12
  - !ruby/object:Gem::Dependency
14
13
  name: rice
@@ -16,15 +15,14 @@ dependencies:
16
15
  requirements:
17
16
  - - ">="
18
17
  - !ruby/object:Gem::Version
19
- version: '4.3'
18
+ version: 4.3.3
20
19
  type: :runtime
21
20
  prerelease: false
22
21
  version_requirements: !ruby/object:Gem::Requirement
23
22
  requirements:
24
23
  - - ">="
25
24
  - !ruby/object:Gem::Version
26
- version: '4.3'
27
- description:
25
+ version: 4.3.3
28
26
  email: andrew@ankane.org
29
27
  executables: []
30
28
  extensions:
@@ -60,7 +58,6 @@ homepage: https://github.com/ankane/outliertree-ruby
60
58
  licenses:
61
59
  - GPL-3.0-or-later
62
60
  metadata: {}
63
- post_install_message:
64
61
  rdoc_options: []
65
62
  require_paths:
66
63
  - lib
@@ -75,8 +72,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
75
72
  - !ruby/object:Gem::Version
76
73
  version: '0'
77
74
  requirements: []
78
- rubygems_version: 3.5.9
79
- signing_key:
75
+ rubygems_version: 3.6.9
80
76
  specification_version: 4
81
77
  summary: Explainable outlier/anomaly detection for Ruby
82
78
  test_files: []