RubyGems - outliertree - Versions diffs - 0.4.1 → 0.5.0 - Mend

outliertree 0.4.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: b2c75c112439d30795757595ab3d34de3fbe80049f1b03e7168f0d1eed405417
-  data.tar.gz: e08ddb70bdf7b1be287b2986fc9364aa554db465fc2ef11b193b8f2adbd7af19
+  metadata.gz: ef5a7a99c49ab6a263bfac8293d85a914fb21794566245737640001315d34cc8
+  data.tar.gz: 29a5aec79b8e24912b422c279fed5ca58de62775296ee1aaad6af2b0fdbbad7a
 SHA512:
-  metadata.gz: 8bf35fb1fad7023c10b5b7514281d6db9c57faacfa8fcbaaa5e39a2aefb43508f5ed1c9c4cac7dd7e96052fac5e0c6036de201223ec3df7241051426e67a349c
-  data.tar.gz: 36d0ffa632d62be0f2ebe73bb7cace6b4f31eddf6202f26afe8250f27a8bbf25606be9ca8e9b5c89a2596b3acdc0c7124cf039d7b5e9557902a08ae233416293
+  metadata.gz: 4b67ff777a77cbd3b01617583deed6c2a033e28b9d7f57721d201e1adc5d72a3ec5ccaa6e4f41337dd607f594dd3a9a1de8ace4ffd55726bcf92b18631bc70cd
+  data.tar.gz: 020d5831ea8abedbcc25c130ea7ed2b009d9126ca64e61ecc27cf051cbe451ec5bd6ec01e0b606c7ff834d252119a19da80a199ecab1fab549caa5958f00ae88

data/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,11 @@
+## 0.5.0 (2026-04-07)
+- Dropped support for Ruby < 3.3
+## 0.4.2 (2025-10-26)
+- Fixed error with Rice 4.7
 ## 0.4.1 (2025-04-23)
 - Updated OutlierTree to 1.10.0

data/NOTICE.txt CHANGED Viewed

@@ -1,5 +1,5 @@
 Copyright (C) 2019-2020 David Cortes
-Copyright (C) 2020-2022 Andrew Kane
+Copyright (C) 2020-2026 Andrew Kane
 This program is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by

data/ext/outliertree/ext.cpp CHANGED Viewed

@@ -1,73 +1,55 @@
+#include <cstddef>
+#include <stdexcept>
+#include <vector>
 // outliertree
 #include <outlier_tree.hpp>
+// fix warning
+#undef restrict
 // rice
 #include <rice/rice.hpp>
-#include <rice/stl.hpp>
-using Rice::Array;
 using Rice::Hash;
-using Rice::Module;
-using Rice::Object;
 using Rice::String;
 using Rice::Symbol;
-using Rice::define_class_under;
-using Rice::define_module;
-namespace Rice::detail
-{
+namespace Rice::detail {
   template<typename T>
-  class To_Ruby<std::vector<T>>
-  {
+  class To_Ruby<std::vector<T>> {
   public:
-    VALUE convert(std::vector<T> const & x)
-    {
-      auto a = rb_ary_new2(x.size());
-      for (const auto& v : x) {
-        rb_ary_push(a, To_Ruby<T>().convert(v));
-      }
-      return a;
-    }
-  };
+    To_Ruby() = default;
-  template<>
-  class To_Ruby<std::vector<signed char>>
-  {
-  public:
-    VALUE convert(std::vector<signed char> const & x)
-    {
-      auto a = rb_ary_new2(x.size());
+    explicit To_Ruby(Arg* arg) : arg_(arg) { }
+    VALUE convert(const std::vector<T>& x) {
+      auto a = detail::protect(rb_ary_new2, x.size());
       for (const auto& v : x) {
-        rb_ary_push(a, To_Ruby<signed char>().convert(v));
+        detail::protect(rb_ary_push, a, To_Ruby<T>().convert(v));
       }
       return a;
     }
-  };
-  template<>
-  struct Type<std::vector<signed char>>
-  {
-    static bool verify()
-    {
-      return true;
-    }
+  private:
+    Arg* arg_ = nullptr;
   };
   template<>
-  struct Type<ColType>
-  {
-    static bool verify()
-    {
+  struct Type<ColType> {
+    static bool verify() {
       return true;
     }
   };
   template<>
-  class To_Ruby<ColType>
-  {
+  class To_Ruby<ColType> {
   public:
-    VALUE convert(ColType const & x)
-    {
+    To_Ruby() = default;
+    explicit To_Ruby(Arg* arg) : arg_(arg) { }
+    VALUE convert(ColType const & x) {
       switch (x) {
         case Numeric: return Symbol("numeric");
         case Categorical: return Symbol("categorical");
@@ -76,23 +58,26 @@ namespace Rice::detail
       }
       throw std::runtime_error("Unknown column type");
     }
+  private:
+    Arg* arg_ = nullptr;
   };
   template<>
-  struct Type<SplitType>
-  {
-    static bool verify()
-    {
+  struct Type<SplitType> {
+    static bool verify() {
       return true;
     }
   };
   template<>
-  class To_Ruby<SplitType>
-  {
+  class To_Ruby<SplitType> {
   public:
-    VALUE convert(SplitType const & x)
-    {
+    To_Ruby() = default;
+    explicit To_Ruby(Arg* arg) : arg_(arg) { }
+    VALUE convert(SplitType const & x) {
       switch (x) {
         case LessOrEqual: return Symbol("less_or_equal");
         case Greater: return Symbol("greater");
@@ -107,16 +92,18 @@ namespace Rice::detail
       }
       throw std::runtime_error("Unknown split type");
     }
+  private:
+    Arg* arg_ = nullptr;
   };
-}
+} // namespace Rice::detail
 extern "C"
-void Init_ext()
-{
-  Module rb_mOutlierTree = define_module("OutlierTree");
-  Module rb_mExt = define_module_under(rb_mOutlierTree, "Ext");
+void Init_ext() {
+  Rice::Module rb_mOutlierTree = Rice::define_module("OutlierTree");
+  Rice::Module rb_mExt = Rice::define_module_under(rb_mOutlierTree, "Ext");
-  define_class_under<Cluster>(rb_mExt, "Cluster")
+  Rice::define_class_under<Cluster>(rb_mExt, "Cluster")
     .define_method("upper_lim", [](Cluster& self) { return self.upper_lim; })
     .define_method("display_lim_high", [](Cluster& self) { return self.display_lim_high; })
     .define_method("perc_below", [](Cluster& self) { return self.perc_below; })
@@ -133,7 +120,7 @@ void Init_ext()
     .define_method("has_na_branch", [](Cluster& self) { return self.has_NA_branch; })
     .define_method("col_num", [](Cluster& self) { return self.col_num; });
-  define_class_under<ClusterTree>(rb_mExt, "ClusterTree")
+  Rice::define_class_under<ClusterTree>(rb_mExt, "ClusterTree")
     .define_method("parent_branch", [](ClusterTree& self) { return self.parent_branch; })
     .define_method("parent", [](ClusterTree& self) { return self.parent; })
     .define_method("all_branches", [](ClusterTree& self) { return self.all_branches; })
@@ -143,7 +130,7 @@ void Init_ext()
     .define_method("split_subset", [](ClusterTree& self) { return self.split_subset; })
     .define_method("split_lev", [](ClusterTree& self) { return self.split_lev; });
-  define_class_under<ModelOutputs>(rb_mExt, "ModelOutputs")
+  Rice::define_class_under<ModelOutputs>(rb_mExt, "ModelOutputs")
     .define_method("outlier_scores_final", [](ModelOutputs& self) { return self.outlier_scores_final; })
     .define_method("outlier_columns_final", [](ModelOutputs& self) { return self.outlier_columns_final; })
     .define_method("outlier_clusters_final", [](ModelOutputs& self) { return self.outlier_clusters_final; })
@@ -154,12 +141,12 @@ void Init_ext()
     .define_method(
       "all_clusters",
       [](ModelOutputs& self, size_t i, size_t j) {
-        return self.all_clusters[i][j];
+        return self.all_clusters.at(i).at(j);
       })
     .define_method(
       "all_trees",
       [](ModelOutputs& self, size_t i, size_t j) {
-        return self.all_trees[i][j];
+        return self.all_trees.at(i).at(j);
       });
   rb_mExt
@@ -169,47 +156,47 @@ void Init_ext()
         ModelOutputs model_outputs;
         // data
-        size_t nrows = options.get<size_t, Symbol>("nrows");
-        size_t ncols_numeric = options.get<size_t, Symbol>("ncols_numeric");
-        size_t ncols_categ = options.get<size_t, Symbol>("ncols_categ");
-        size_t ncols_ord = options.get<size_t, Symbol>("ncols_ord");
+        auto nrows = options.get<size_t, Symbol>("nrows");
+        auto ncols_numeric = options.get<size_t, Symbol>("ncols_numeric");
+        auto ncols_categ = options.get<size_t, Symbol>("ncols_categ");
+        auto ncols_ord = options.get<size_t, Symbol>("ncols_ord");
-        double *restrict numeric_data = NULL;
+        double* numeric_data = nullptr;
         if (ncols_numeric > 0) {
-          numeric_data = (double*) options.get<String, Symbol>("numeric_data").c_str();
+          numeric_data = reinterpret_cast<double*>(const_cast<char*>(options.get<String, Symbol>("numeric_data").c_str()));
         }
-        int *restrict categorical_data = NULL;
-        int *restrict ncat = NULL;
+        int* categorical_data = nullptr;
+        int* ncat = nullptr;
         if (ncols_categ > 0) {
-          categorical_data = (int*) options.get<String, Symbol>("categorical_data").c_str();
-          ncat = (int*) options.get<String, Symbol>("ncat").c_str();
+          categorical_data = reinterpret_cast<int*>(const_cast<char*>(options.get<String, Symbol>("categorical_data").c_str()));
+          ncat = reinterpret_cast<int*>(const_cast<char*>(options.get<String, Symbol>("ncat").c_str()));
         }
-        int *restrict ordinal_data = NULL;
-        int *restrict ncat_ord = NULL;
+        int* ordinal_data = nullptr;
+        int* ncat_ord = nullptr;
         if (ncols_ord > 0) {
-          ordinal_data = (int*) options.get<String, Symbol>("ordinal_data").c_str();
-          ncat_ord = (int*) options.get<String, Symbol>("ncat_ord").c_str();
+          ordinal_data = reinterpret_cast<int*>(const_cast<char*>(options.get<String, Symbol>("ordinal_data").c_str()));
+          ncat_ord = reinterpret_cast<int*>(const_cast<char*>(options.get<String, Symbol>("ncat_ord").c_str()));
         }
         // options
-        char *restrict cols_ignore = NULL;
-        int nthreads = options.get<int, Symbol>("nthreads");
-        bool categ_as_bin = options.get<bool, Symbol>("categ_as_bin");
-        bool ord_as_bin = options.get<bool, Symbol>("ord_as_bin");
-        bool cat_bruteforce_subset = options.get<bool, Symbol>("cat_bruteforce_subset");
-        bool categ_from_maj = options.get<bool, Symbol>("categ_from_maj");
-        bool take_mid = options.get<bool, Symbol>("take_mid");
-        size_t max_depth = options.get<size_t, Symbol>("max_depth");
-        double max_perc_outliers = options.get<double, Symbol>("pct_outliers");
-        size_t min_size_numeric = options.get<size_t, Symbol>("min_size_numeric");
-        size_t min_size_categ = options.get<size_t, Symbol>("min_size_categ");
-        double min_gain = options.get<double, Symbol>("min_gain");
-        bool gain_as_pct = options.get<bool, Symbol>("gain_as_pct");
-        bool follow_all = options.get<bool, Symbol>("follow_all");
-        double z_norm = options.get<double, Symbol>("z_norm");
-        double z_outlier = options.get<double, Symbol>("z_outlier");
+        char* cols_ignore = nullptr;
+        auto nthreads = options.get<int, Symbol>("nthreads");
+        auto categ_as_bin = options.get<bool, Symbol>("categ_as_bin");
+        auto ord_as_bin = options.get<bool, Symbol>("ord_as_bin");
+        auto cat_bruteforce_subset = options.get<bool, Symbol>("cat_bruteforce_subset");
+        auto categ_from_maj = options.get<bool, Symbol>("categ_from_maj");
+        auto take_mid = options.get<bool, Symbol>("take_mid");
+        auto max_depth = options.get<size_t, Symbol>("max_depth");
+        auto max_perc_outliers = options.get<double, Symbol>("pct_outliers");
+        auto min_size_numeric = options.get<size_t, Symbol>("min_size_numeric");
+        auto min_size_categ = options.get<size_t, Symbol>("min_size_categ");
+        auto min_gain = options.get<double, Symbol>("min_gain");
+        auto gain_as_pct = options.get<bool, Symbol>("gain_as_pct");
+        auto follow_all = options.get<bool, Symbol>("follow_all");
+        auto z_norm = options.get<double, Symbol>("z_norm");
+        auto z_outlier = options.get<double, Symbol>("z_outlier");
         fit_outliers_models(
           model_outputs,
@@ -245,28 +232,28 @@ void Init_ext()
       "find_new_outliers",
       [](ModelOutputs& model_outputs, Hash options) {
         // data
-        size_t nrows = options.get<size_t, Symbol>("nrows");
-        size_t ncols_numeric = options.get<size_t, Symbol>("ncols_numeric");
-        size_t ncols_categ = options.get<size_t, Symbol>("ncols_categ");
-        size_t ncols_ord = options.get<size_t, Symbol>("ncols_ord");
+        auto nrows = options.get<size_t, Symbol>("nrows");
+        auto ncols_numeric = options.get<size_t, Symbol>("ncols_numeric");
+        auto ncols_categ = options.get<size_t, Symbol>("ncols_categ");
+        auto ncols_ord = options.get<size_t, Symbol>("ncols_ord");
-        double *restrict numeric_data = NULL;
+        double* numeric_data = nullptr;
         if (ncols_numeric > 0) {
-          numeric_data = (double*) options.get<String, Symbol>("numeric_data").c_str();
+          numeric_data = reinterpret_cast<double*>(const_cast<char*>(options.get<String, Symbol>("numeric_data").c_str()));
         }
-        int *restrict categorical_data = NULL;
+        int* categorical_data = nullptr;
         if (ncols_categ > 0) {
-          categorical_data = (int*) options.get<String, Symbol>("categorical_data").c_str();
+          categorical_data = reinterpret_cast<int*>(const_cast<char*>(options.get<String, Symbol>("categorical_data").c_str()));
         }
-        int *restrict ordinal_data = NULL;
+        int* ordinal_data = nullptr;
         if (ncols_ord > 0) {
-          ordinal_data = (int*) options.get<String, Symbol>("ordinal_data").c_str();
+          ordinal_data = reinterpret_cast<int*>(const_cast<char*>(options.get<String, Symbol>("ordinal_data").c_str()));
         }
         // options
-        int nthreads = options.get<int, Symbol>("nthreads");
+        auto nthreads = options.get<int, Symbol>("nthreads");
         find_new_outliers(
           numeric_data,

data/lib/outliertree/model.rb CHANGED Viewed

@@ -1,11 +1,20 @@
 module OutlierTree
   class Model
     def initialize(
-      max_depth: 4, min_gain: 0.01, z_norm: 2.67, z_outlier: 8.0, pct_outliers: 0.01,
-      min_size_numeric: 25, min_size_categ: 50, categ_split: "binarize", categ_outliers: "tail",
-      numeric_split: "raw", follow_all: false, gain_as_pct: true, nthreads: -1
+      max_depth: 4,
+      min_gain: 0.01,
+      z_norm: 2.67,
+      z_outlier: 8.0,
+      pct_outliers: 0.01,
+      min_size_numeric: 25,
+      min_size_categ: 50,
+      categ_split: "binarize",
+      categ_outliers: "tail",
+      numeric_split: "raw",
+      follow_all: false,
+      gain_as_pct: true,
+      nthreads: -1
     )
       # TODO validate values
       @max_depth = max_depth
       @min_gain = min_gain

data/lib/outliertree/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module OutlierTree
-  VERSION = "0.4.1"
+  VERSION = "0.5.0"
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: outliertree
 version: !ruby/object:Gem::Version
-  version: 0.4.1
+  version: 0.5.0
 platform: ruby
 authors:
 - Andrew Kane
@@ -65,14 +65,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
-      version: '3.1'
+      version: '3.3'
 required_rubygems_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.6.7
+rubygems_version: 4.0.6
 specification_version: 4
 summary: Explainable outlier/anomaly detection for Ruby
 test_files: []