RubyGems - rumale - Versions diffs - 0.22.5 → 0.23.2 - Mend

rumale 0.22.5 → 0.23.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +21 -0
data/LICENSE.txt +1 -1
data/README.md +34 -2
data/ext/rumale/extconf.rb +1 -1
data/ext/rumale/{tree.c → rumaleext.c} +51 -85
data/ext/rumale/{tree.h → rumaleext.h} +5 -5
data/lib/rumale/clustering/hdbscan.rb +28 -8
data/lib/rumale/clustering/single_linkage.rb +23 -5
data/lib/rumale/decomposition/fast_ica.rb +1 -1
data/lib/rumale/ensemble/gradient_boosting_classifier.rb +2 -2
data/lib/rumale/ensemble/gradient_boosting_regressor.rb +1 -1
data/lib/rumale/ensemble/random_forest_classifier.rb +1 -1
data/lib/rumale/ensemble/random_forest_regressor.rb +1 -1
data/lib/rumale/evaluation_measure/roc_auc.rb +1 -2
data/lib/rumale/kernel_approximation/nystroem.rb +1 -1
data/lib/rumale/kernel_machine/kernel_svc.rb +1 -1
data/lib/rumale/linear_model/linear_regression.rb +5 -3
data/lib/rumale/linear_model/ridge.rb +3 -3
data/lib/rumale/model_selection/grid_search_cv.rb +3 -3
data/lib/rumale/naive_bayes/bernoulli_nb.rb +1 -1
data/lib/rumale/naive_bayes/gaussian_nb.rb +1 -1
data/lib/rumale/naive_bayes/multinomial_nb.rb +1 -1
data/lib/rumale/nearest_neighbors/vp_tree.rb +2 -0
data/lib/rumale/tree/base_decision_tree.rb +15 -10
data/lib/rumale/tree/decision_tree_classifier.rb +14 -11
data/lib/rumale/tree/decision_tree_regressor.rb +0 -1
data/lib/rumale/tree/gradient_tree_regressor.rb +15 -11
data/lib/rumale/version.rb +1 -1
data/lib/rumale.rb +132 -133
metadata +6 -17
data/.coveralls.yml +0 -1
data/.github/workflows/build.yml +0 -26
data/.github/workflows/coverage.yml +0 -28
data/.gitignore +0 -23
data/.rspec +0 -3
data/.rubocop.yml +0 -93
data/.travis.yml +0 -17
data/Gemfile +0 -17
data/Rakefile +0 -14
data/ext/rumale/rumale.c +0 -10
data/ext/rumale/rumale.h +0 -8
data/rumale.gemspec +0 -49

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: '058078489d3ff66d67432e1418ae786292c263e05e75b6703fb5a7e65e88bd46'
-  data.tar.gz: bd7ed9b223e0cd0074ffdd3e521b01c195f82909013c93f4736ab338d5920c96
+  metadata.gz: 4564c37af7744bc4fe14dec5c5fc1e236687c3a241d2e17ef2d89f1c57056af9
+  data.tar.gz: 6f70d79a10b890bbd127f60f1c7f26934fcd88f71458af8839ac049b7a07efc8
 SHA512:
-  metadata.gz: 79ce4715a503b1b5a618526832adad5912daac72af3e8f1892ff2df14b7695e546419d6f85e5ea735abd2ea06da649a763f96c82b95eee75341934fa65fce93e
-  data.tar.gz: 5948583ec6c5ca10b320e09c447f9cc1e244dd3bfbf95800338dba2eb7e1b46a342d44020fa24c26a7161786feb63c82b0677654e4aa087c311337a606880a22
+  metadata.gz: 5671a08ac8e9881f51896c4478ce5f4b54457c83d9b7194623febfd1859123cda5947c0d344aa551686c2c964359e9bdbd5ad13e9c921d2a3393a76717c00093
+  data.tar.gz: bb022827e8ca9d939addb9cfdd9b5fa5b643cd56150a84f41a224dde0c75992badbf792f77d06194943f015572beb5bafdd3c84e43efd98be5cc53beb9347ab0

data/CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,24 @@
+# 0.23.2
+Rumale project will be rebooted on version 0.24.0.
+This version is probably the last release of the series starting with version 0.8.0.
+- Refactor some codes and configs.
+- Deprecate VPTree class.
+# 0.23.1
+- Fix all estimators to return inference results in a contiguous narray.
+- Fix to use until statement instead of recursive call on apply methods of tree estimators.
+- Rename native extension files.
+- Introduce clang-format for native extension codes.
+# 0.23.0
+## Breaking change
+- Change automalically selected solver from sgd to lbfgs in
+[LinearRegression](https://yoshoku.github.io/rumale/doc/Rumale/LinearModel/LinearRegression.html) and
+[Ridge](https://yoshoku.github.io/rumale/doc/Rumale/LinearModel/Ridge.html).
+  - When given 'auto' to solver parameter, these estimator select  the 'svd' solver if Numo::Linalg is loaded.
+  Otherwise, they select the 'lbfgs' solver.
 # 0.22.5
 - Add transformer class for calculating kernel matrix.
   - [KernelCalculator](https://yoshoku.github.io/rumale/doc/Rumale/Preprocessing/KernelCalculator.html)

data/LICENSE.txt CHANGED Viewed

@@ -1,4 +1,4 @@
-Copyright (c) 2017-2021 Atsushi Tatsuma
+Copyright (c) 2017-2022 Atsushi Tatsuma
 All rights reserved.
 Redistribution and use in source and binary forms, with or without

data/README.md CHANGED Viewed

@@ -1,9 +1,10 @@
 # Rumale
+**This project is suspended for the author's health reasons. It will be resumed when the author recovers.**
 ![Rumale](https://dl.dropboxusercontent.com/s/joxruk2720ur66o/rumale_header_400.png)
-[![Build Status](https://github.com/yoshoku/rumale/workflows/build/badge.svg)](https://github.com/yoshoku/rumale/actions?query=workflow%3Abuild)
-[![Coverage Status](https://coveralls.io/repos/github/yoshoku/rumale/badge.svg?branch=main)](https://coveralls.io/github/yoshoku/rumale?branch=main)
+[![Build Status](https://github.com/yoshoku/rumale/actions/workflows/build.yml/badge.svg)](https://github.com/yoshoku/rumale/actions/workflows/build.yml)
 [![Gem Version](https://badge.fury.io/rb/rumale.svg)](https://badge.fury.io/rb/rumale)
 [![BSD 2-Clause License](https://img.shields.io/badge/License-BSD%202--Clause-orange.svg)](https://github.com/yoshoku/rumale/blob/main/LICENSE.txt)
 [![Documentation](https://img.shields.io/badge/api-reference-blue.svg)](https://yoshoku.github.io/rumale/doc/)
@@ -189,6 +190,12 @@ Ubuntu:
 $ sudo apt-get install libopenblas-dev liblapacke-dev
 ```
+Fedora:
+```bash
+$ sudo dnf install openblas-devel lapack-devel
+```
 Windows (MSYS2):
 ```bash
@@ -226,6 +233,12 @@ Ubuntu:
 $ sudo apt-get install gcc gfortran make
 ```
+Fedora:
+```bash
+$ sudo dnf install gcc gcc-gfortran make
+```
 Install Numo::OpenBLAS gem.
 ```bash
@@ -239,6 +252,25 @@ require 'numo/openblas'
 require 'rumale'
 ```
+### Numo::BLIS
+[Numo::BLIS](https://github.com/yoshoku/numo-blis) downloads and builds BLIS during installation
+and uses that as a background library for Numo::Linalg.
+BLIS is one of the high-performance BLAS as with OpenBLAS,
+and using that can be expected to speed up of processing in Rumale.
+Install Numo::BLIS gem.
+```bash
+$ gem install numo-blis
+```
+Load Numo::BLIS gem instead of Numo::Linalg.
+```ruby
+require 'numo/blis'
+require 'rumale'
+```
 ### Parallel
 Several estimators in Rumale support parallel processing.
 Parallel processing in Rumale is realized by [Parallel](https://github.com/grosser/parallel) gem,

data/ext/rumale/extconf.rb CHANGED Viewed

@@ -28,4 +28,4 @@ if RUBY_PLATFORM =~ /mswin|cygwin|mingw/
   end
 end
-create_makefile('rumale/rumale')
+create_makefile('rumale/rumaleext')

data/ext/rumale/{tree.c → rumaleext.c} RENAMED Viewed

@@ -1,18 +1,12 @@
-#include "tree.h"
+#include "rumaleext.h"
-RUBY_EXTERN VALUE mRumale;
-double*
-alloc_dbl_array(const long n_dimensions)
-{
+double* alloc_dbl_array(const long n_dimensions) {
   double* arr = ALLOC_N(double, n_dimensions);
   memset(arr, 0, n_dimensions * sizeof(double));
   return arr;
 }
-double
-calc_gini_coef(double* histogram, const long n_elements, const long n_classes)
-{
+double calc_gini_coef(double* histogram, const long n_elements, const long n_classes) {
   long i;
   double el;
   double gini = 0.0;
@@ -25,9 +19,7 @@ calc_gini_coef(double* histogram, const long n_elements, const long n_classes)
   return 1.0 - gini;
 }
-double
-calc_entropy(double* histogram, const long n_elements, const long n_classes)
-{
+double calc_entropy(double* histogram, const long n_elements, const long n_classes) {
   long i;
   double el;
   double entropy = 0.0;
@@ -41,8 +33,7 @@ calc_entropy(double* histogram, const long n_elements, const long n_classes)
 }
 VALUE
-calc_mean_vec(double* sum_vec, const long n_dimensions, const long n_elements)
-{
+calc_mean_vec(double* sum_vec, const long n_dimensions, const long n_elements) {
   long i;
   VALUE mean_vec = rb_ary_new2(n_dimensions);
@@ -53,9 +44,7 @@ calc_mean_vec(double* sum_vec, const long n_dimensions, const long n_elements)
   return mean_vec;
 }
-double
-calc_vec_mae(VALUE vec_a, VALUE vec_b)
-{
+double calc_vec_mae(VALUE vec_a, VALUE vec_b) {
   long i;
   const long n_dimensions = RARRAY_LEN(vec_a);
   double sum = 0.0;
@@ -69,9 +58,7 @@ calc_vec_mae(VALUE vec_a, VALUE vec_b)
   return sum / n_dimensions;
 }
-double
-calc_vec_mse(VALUE vec_a, VALUE vec_b)
-{
+double calc_vec_mse(VALUE vec_a, VALUE vec_b) {
   long i;
   const long n_dimensions = RARRAY_LEN(vec_a);
   double sum = 0.0;
@@ -85,9 +72,7 @@ calc_vec_mse(VALUE vec_a, VALUE vec_b)
   return sum / n_dimensions;
 }
-double
-calc_mae(VALUE target_vecs, VALUE mean_vec)
-{
+double calc_mae(VALUE target_vecs, VALUE mean_vec) {
   long i;
   const long n_elements = RARRAY_LEN(target_vecs);
   double sum = 0.0;
@@ -99,9 +84,7 @@ calc_mae(VALUE target_vecs, VALUE mean_vec)
   return sum / n_elements;
 }
-double
-calc_mse(VALUE target_vecs, VALUE mean_vec)
-{
+double calc_mse(VALUE target_vecs, VALUE mean_vec) {
   long i;
   const long n_elements = RARRAY_LEN(target_vecs);
   double sum = 0.0;
@@ -113,18 +96,14 @@ calc_mse(VALUE target_vecs, VALUE mean_vec)
   return sum / n_elements;
 }
-double
-calc_impurity_cls(const char* criterion, double* histogram, const long n_elements, const long n_classes)
-{
+double calc_impurity_cls(const char* criterion, double* histogram, const long n_elements, const long n_classes) {
   if (strcmp(criterion, "entropy") == 0) {
     return calc_entropy(histogram, n_elements, n_classes);
   }
   return calc_gini_coef(histogram, n_elements, n_classes);
 }
-double
-calc_impurity_reg(const char* criterion, VALUE target_vecs, double* sum_vec)
-{
+double calc_impurity_reg(const char* criterion, VALUE target_vecs, double* sum_vec) {
   const long n_elements = RARRAY_LEN(target_vecs);
   const long n_dimensions = RARRAY_LEN(rb_ary_entry(target_vecs, 0));
   VALUE mean_vec = calc_mean_vec(sum_vec, n_dimensions, n_elements);
@@ -135,9 +114,7 @@ calc_impurity_reg(const char* criterion, VALUE target_vecs, double* sum_vec)
   return calc_mse(target_vecs, mean_vec);
 }
-void
-add_sum_vec(double* sum_vec, VALUE target)
-{
+void add_sum_vec(double* sum_vec, VALUE target) {
   long i;
   const long n_dimensions = RARRAY_LEN(target);
@@ -146,9 +123,7 @@ add_sum_vec(double* sum_vec, VALUE target)
   }
 }
-void
-sub_sum_vec(double* sum_vec, VALUE target)
-{
+void sub_sum_vec(double* sum_vec, VALUE target) {
   long i;
   const long n_dimensions = RARRAY_LEN(target);
@@ -168,9 +143,7 @@ typedef struct {
 /**
  * @!visibility private
  */
-static void
-iter_find_split_params_cls(na_loop_t const* lp)
-{
+static void iter_find_split_params_cls(na_loop_t const* lp) {
   const int32_t* o = (int32_t*)NDL_PTR(lp, 0);
   const double* f = (double*)NDL_PTR(lp, 1);
   const int32_t* y = (int32_t*)NDL_PTR(lp, 2);
@@ -200,7 +173,9 @@ iter_find_split_params_cls(na_loop_t const* lp)
   params[3] = 0.0;        /* gain */
   /* Initialize child node variables. */
-  for (i = 0; i < n_elements; i++) { r_histogram[y[o[i]]] += 1.0; }
+  for (i = 0; i < n_elements; i++) {
+    r_histogram[y[o[i]]] += 1.0;
+  }
   /* Find optimal parameters. */
   while (curr_pos < n_elements && curr_el != last_el) {
@@ -246,14 +221,13 @@ iter_find_split_params_cls(na_loop_t const* lp)
  * @param n_classes [Integer] The number of classes.
  * @return [Array<Float>] The array consists of optimal parameters including impurities of child nodes, threshold, and gain.
  */
-static VALUE
-find_split_params_cls(VALUE self, VALUE criterion, VALUE impurity, VALUE order, VALUE features, VALUE labels, VALUE n_classes)
-{
-  ndfunc_arg_in_t ain[3] = { {numo_cInt32, 1}, {numo_cDFloat, 1}, {numo_cInt32, 1} };
-  size_t out_shape[1] = { 4 };
-  ndfunc_arg_out_t aout[1] = { {numo_cDFloat, 1, out_shape} };
-  ndfunc_t ndf = { (na_iter_func_t)iter_find_split_params_cls, NO_LOOP, 3, 1, ain, aout };
-  split_opts_cls opts = { StringValuePtr(criterion), NUM2LONG(n_classes), NUM2DBL(impurity) };
+static VALUE find_split_params_cls(VALUE self, VALUE criterion, VALUE impurity, VALUE order, VALUE features, VALUE labels,
+                                   VALUE n_classes) {
+  ndfunc_arg_in_t ain[3] = {{numo_cInt32, 1}, {numo_cDFloat, 1}, {numo_cInt32, 1}};
+  size_t out_shape[1] = {4};
+  ndfunc_arg_out_t aout[1] = {{numo_cDFloat, 1, out_shape}};
+  ndfunc_t ndf = {(na_iter_func_t)iter_find_split_params_cls, NO_LOOP, 3, 1, ain, aout};
+  split_opts_cls opts = {StringValuePtr(criterion), NUM2LONG(n_classes), NUM2DBL(impurity)};
   VALUE params = na_ndloop3(&ndf, &opts, 3, order, features, labels);
   VALUE results = rb_ary_new2(4);
   double* params_ptr = (double*)na_get_pointer_for_read(params);
@@ -276,9 +250,7 @@ typedef struct {
 /**
  * @!visibility private
  */
-static void
-iter_find_split_params_reg(na_loop_t const* lp)
-{
+static void iter_find_split_params_reg(na_loop_t const* lp) {
   const int32_t* o = (int32_t*)NDL_PTR(lp, 0);
   const double* f = (double*)NDL_PTR(lp, 1);
   const double* y = (double*)NDL_PTR(lp, 2);
@@ -367,14 +339,12 @@ iter_find_split_params_reg(na_loop_t const* lp)
  * @param targets [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values.
  * @return [Array<Float>] The array consists of optimal parameters including impurities of child nodes, threshold, and gain.
  */
-static VALUE
-find_split_params_reg(VALUE self, VALUE criterion, VALUE impurity, VALUE order, VALUE features, VALUE targets)
-{
-  ndfunc_arg_in_t ain[3] = { {numo_cInt32, 1}, {numo_cDFloat, 1}, {numo_cDFloat, 2} };
-  size_t out_shape[1] = { 4 };
-  ndfunc_arg_out_t aout[1] = { {numo_cDFloat, 1, out_shape} };
-  ndfunc_t ndf = { (na_iter_func_t)iter_find_split_params_reg, NO_LOOP, 3, 1, ain, aout };
-  split_opts_reg opts = { StringValuePtr(criterion), NUM2DBL(impurity) };
+static VALUE find_split_params_reg(VALUE self, VALUE criterion, VALUE impurity, VALUE order, VALUE features, VALUE targets) {
+  ndfunc_arg_in_t ain[3] = {{numo_cInt32, 1}, {numo_cDFloat, 1}, {numo_cDFloat, 2}};
+  size_t out_shape[1] = {4};
+  ndfunc_arg_out_t aout[1] = {{numo_cDFloat, 1, out_shape}};
+  ndfunc_t ndf = {(na_iter_func_t)iter_find_split_params_reg, NO_LOOP, 3, 1, ain, aout};
+  split_opts_reg opts = {StringValuePtr(criterion), NUM2DBL(impurity)};
   VALUE params = na_ndloop3(&ndf, &opts, 3, order, features, targets);
   VALUE results = rb_ary_new2(4);
   double* params_ptr = (double*)na_get_pointer_for_read(params);
@@ -390,9 +360,7 @@ find_split_params_reg(VALUE self, VALUE criterion, VALUE impurity, VALUE order,
 /**
  * @!visibility private
  */
-static void
-iter_find_split_params_grad_reg(na_loop_t const* lp)
-{
+static void iter_find_split_params_grad_reg(na_loop_t const* lp) {
   const int32_t* o = (int32_t*)NDL_PTR(lp, 0);
   const double* f = (double*)NDL_PTR(lp, 1);
   const double* g = (double*)NDL_PTR(lp, 2);
@@ -427,15 +395,16 @@ iter_find_split_params_grad_reg(na_loop_t const* lp)
     /* Calculate gain of new split. */
     r_grad = s_grad - l_grad;
     r_hess = s_hess - l_hess;
-    gain = (l_grad * l_grad) / (l_hess + reg_lambda) +
-           (r_grad * r_grad) / (r_hess + reg_lambda) -
+    gain = (l_grad * l_grad) / (l_hess + reg_lambda) + (r_grad * r_grad) / (r_hess + reg_lambda) -
            (s_grad * s_grad) / (s_hess + reg_lambda);
     /* Update optimal parameters. */
     if (gain > gain_max) {
       threshold = 0.5 * (curr_el + next_el);
       gain_max = gain;
     }
-    if (next_pos == n_elements) break;
+    if (next_pos == n_elements) {
+      break;
+    }
     curr_pos = next_pos;
     curr_el = f[o[curr_pos]];
   }
@@ -458,15 +427,13 @@ iter_find_split_params_grad_reg(na_loop_t const* lp)
  *   @param reg_lambda [Float] The L2 regularization term on weight.
  * @return [Array<Float>] The array consists of optimal parameters including threshold and gain.
  */
-static VALUE
-find_split_params_grad_reg
-(VALUE self, VALUE order, VALUE features, VALUE gradients, VALUE hessians, VALUE sum_gradient, VALUE sum_hessian, VALUE reg_lambda)
-{
-  ndfunc_arg_in_t ain[4] = { {numo_cInt32, 1}, {numo_cDFloat, 1}, {numo_cDFloat, 1}, {numo_cDFloat, 1} };
-  size_t out_shape[1] = { 2 };
-  ndfunc_arg_out_t aout[1] = { {numo_cDFloat, 1, out_shape} };
-  ndfunc_t ndf = { (na_iter_func_t)iter_find_split_params_grad_reg, NO_LOOP, 4, 1, ain, aout };
-  double opts[3] = { NUM2DBL(sum_gradient), NUM2DBL(sum_hessian), NUM2DBL(reg_lambda) };
+static VALUE find_split_params_grad_reg(VALUE self, VALUE order, VALUE features, VALUE gradients, VALUE hessians,
+                                        VALUE sum_gradient, VALUE sum_hessian, VALUE reg_lambda) {
+  ndfunc_arg_in_t ain[4] = {{numo_cInt32, 1}, {numo_cDFloat, 1}, {numo_cDFloat, 1}, {numo_cDFloat, 1}};
+  size_t out_shape[1] = {2};
+  ndfunc_arg_out_t aout[1] = {{numo_cDFloat, 1, out_shape}};
+  ndfunc_t ndf = {(na_iter_func_t)iter_find_split_params_grad_reg, NO_LOOP, 4, 1, ain, aout};
+  double opts[3] = {NUM2DBL(sum_gradient), NUM2DBL(sum_hessian), NUM2DBL(reg_lambda)};
   VALUE params = na_ndloop3(&ndf, opts, 4, order, features, gradients, hessians);
   VALUE results = rb_ary_new2(2);
   double* params_ptr = (double*)na_get_pointer_for_read(params);
@@ -488,9 +455,7 @@ find_split_params_grad_reg
  * @param n_classes_ [Integer] The number of classes.
  * @return [Float] impurity
  */
-static VALUE
-node_impurity_cls(VALUE self, VALUE criterion, VALUE y_nary, VALUE n_elements_, VALUE n_classes_)
-{
+static VALUE node_impurity_cls(VALUE self, VALUE criterion, VALUE y_nary, VALUE n_elements_, VALUE n_classes_) {
   long i;
   const long n_classes = NUM2LONG(n_classes_);
   const long n_elements = NUM2LONG(n_elements_);
@@ -498,7 +463,9 @@ node_impurity_cls(VALUE self, VALUE criterion, VALUE y_nary, VALUE n_elements_,
   double* histogram = alloc_dbl_array(n_classes);
   VALUE ret;
-  for (i = 0; i < n_elements; i++) { histogram[y[i]] += 1; }
+  for (i = 0; i < n_elements; i++) {
+    histogram[y[i]] += 1;
+  }
   ret = DBL2NUM(calc_impurity_cls(StringValuePtr(criterion), histogram, n_elements, n_classes));
@@ -520,9 +487,7 @@ node_impurity_cls(VALUE self, VALUE criterion, VALUE y_nary, VALUE n_elements_,
  * @param y [Array<Float>] (shape: [n_samples, n_outputs]) The taget values.
  * @return [Float] impurity
  */
-static VALUE
-node_impurity_reg(VALUE self, VALUE criterion, VALUE y)
-{
+static VALUE node_impurity_reg(VALUE self, VALUE criterion, VALUE y) {
   long i;
   const long n_elements = RARRAY_LEN(y);
   const long n_outputs = RARRAY_LEN(rb_ary_entry(y, 0));
@@ -546,9 +511,10 @@ node_impurity_reg(VALUE self, VALUE criterion, VALUE y)
   return ret;
 }
-void init_tree_module()
-{
+void Init_rumaleext(void) {
+  VALUE mRumale = rb_define_module("Rumale");
   VALUE mTree = rb_define_module_under(mRumale, "Tree");
   /**
    * Document-module: Rumale::Tree::ExtDecisionTreeClassifier
    * @!visibility private

data/ext/rumale/{tree.h → rumaleext.h} RENAMED Viewed

@@ -1,12 +1,12 @@
-#ifndef RUMALE_TREE_H
-#define RUMALE_TREE_H 1
+#ifndef RUMALEEXT_H
+#define RUMALEEXT_H 1
 #include <math.h>
 #include <string.h>
 #include <ruby.h>
 #include <numo/narray.h>
 #include <numo/template.h>
-void init_tree_module();
-#endif /* RUMALE_TREE_H */
+#endif /* RUMALEEXT_H */

data/lib/rumale/clustering/hdbscan.rb CHANGED Viewed

@@ -1,6 +1,5 @@
 # frozen_string_literal: true
-require 'ostruct'
 require 'rumale/base/base_estimator'
 require 'rumale/base/cluster_analyzer'
 require 'rumale/pairwise_metric'
@@ -108,7 +107,28 @@ module Rumale
         end
       end
-      private_constant :UnionFind
+      # @!visibility private
+      class Node
+        # @!visibility private
+        attr_reader :x, :y, :weight, :n_elements
+        # @!visibility private
+        def initialize(x:, y:, weight:, n_elements: 0)
+          @x = x
+          @y = y
+          @weight = weight
+          @n_elements = n_elements
+        end
+        # @!visibility private
+        def ==(other)
+          # :nocov:
+          x == other.x && y == other.y && weight == other.weight && n_elements == other.n_elements
+          # :nocov:
+        end
+      end
+      private_constant :UnionFind, :Node
       def partial_fit(distance_mat)
         mr_distance_mat = mutual_reachability_distances(distance_mat, @params[:min_samples])
@@ -161,30 +181,30 @@ module Rumale
           if n_x_elements >= min_cluster_size && n_y_elements >= min_cluster_size
             relabel[edge.x] = next_label
-            res.push(OpenStruct.new(x: relabel[n_id], y: relabel[edge.x], weight: density, n_elements: n_x_elements))
+            res.push(Node.new(x: relabel[n_id], y: relabel[edge.x], weight: density, n_elements: n_x_elements))
             next_label += 1
             relabel[edge.y] = next_label
-            res.push(OpenStruct.new(x: relabel[n_id], y: relabel[edge.y], weight: density, n_elements: n_y_elements))
+            res.push(Node.new(x: relabel[n_id], y: relabel[edge.y], weight: density, n_elements: n_y_elements))
             next_label += 1
           elsif n_x_elements < min_cluster_size && n_y_elements < min_cluster_size
             breadth_first_search_hierarchy(hierarchy, edge.x).each do |sn_id|
-              res.push(OpenStruct.new(x: relabel[n_id], y: sn_id, weight: density, n_elements: 1)) if sn_id < n_points
+              res.push(Node.new(x: relabel[n_id], y: sn_id, weight: density, n_elements: 1)) if sn_id < n_points
               visited[sn_id] = true
             end
             breadth_first_search_hierarchy(hierarchy, edge.y).each do |sn_id|
-              res.push(OpenStruct.new(x: relabel[n_id], y: sn_id, weight: density, n_elements: 1)) if sn_id < n_points
+              res.push(Node.new(x: relabel[n_id], y: sn_id, weight: density, n_elements: 1)) if sn_id < n_points
               visited[sn_id] = true
             end
           elsif n_x_elements < min_cluster_size
             relabel[edge.y] = relabel[n_id]
             breadth_first_search_hierarchy(hierarchy, edge.x).each do |sn_id|
-              res.push(OpenStruct.new(x: relabel[n_id], y: sn_id, weight: density, n_elements: 1)) if sn_id < n_points
+              res.push(Node.new(x: relabel[n_id], y: sn_id, weight: density, n_elements: 1)) if sn_id < n_points
               visited[sn_id] = true
             end
           elsif n_y_elements < min_cluster_size
             relabel[edge.x] = relabel[n_id]
             breadth_first_search_hierarchy(hierarchy, edge.y).each do |sn_id|
-              res.push(OpenStruct.new(x: relabel[n_id], y: sn_id, weight: density, n_elements: 1)) if sn_id < n_points
+              res.push(Node.new(x: relabel[n_id], y: sn_id, weight: density, n_elements: 1)) if sn_id < n_points
               visited[sn_id] = true
             end
           end

data/lib/rumale/clustering/single_linkage.rb CHANGED Viewed

@@ -1,6 +1,5 @@
 # frozen_string_literal: true
-require 'ostruct'
 require 'rumale/base/base_estimator'
 require 'rumale/base/cluster_analyzer'
 require 'rumale/pairwise_metric'
@@ -25,7 +24,7 @@ module Rumale
       attr_reader :labels
       # Return the hierarchical structure.
-      # @return [Array<OpenStruct>] (shape: [n_samples - 1])
+      # @return [Array<SingleLinkage::Node>] (shape: [n_samples - 1])
       attr_reader :hierarchy
       # Create a new cluster analyzer with single linkage algorithm.
@@ -104,7 +103,26 @@ module Rumale
         end
       end
-      private_constant :UnionFind
+      # @!visibility private
+      class Node
+        # @!visibility private
+        attr_reader :x, :y, :weight, :n_elements
+        # @!visibility private
+        def initialize(x:, y:, weight:, n_elements: 0)
+          @x = x
+          @y = y
+          @weight = weight
+          @n_elements = n_elements
+        end
+        # @!visibility private
+        def ==(other)
+          x == other.x && y == other.y && weight == other.weight && n_elements == other.n_elements
+        end
+      end
+      private_constant :UnionFind, :Node
       def partial_fit(distance_mat)
         mst = minimum_spanning_tree(distance_mat)
@@ -125,7 +143,7 @@ module Rumale
           curr_weights = Numo::DFloat.minimum(curr_weights[target], complete_graph[curr_node, curr_labels])
           next_node = curr_labels[curr_weights.min_index]
           weight = curr_weights.min
-          OpenStruct.new(x: curr_node, y: next_node, weight: weight)
+          Node.new(x: curr_node, y: next_node, weight: weight)
         end
         mst.sort! { |a, b| a.weight <=> b.weight }
       end
@@ -140,7 +158,7 @@ module Rumale
           x_root, y_root = [y_root, x_root] unless x_root < y_root
           weight = mst[n].weight
           n_samples = uf.union(x_root, y_root)
-          OpenStruct.new(x: x_root, y: y_root, weight: weight, n_elements: n_samples)
+          Node.new(x: x_root, y: y_root, weight: weight, n_elements: n_samples)
         end
       end

data/lib/rumale/decomposition/fast_ica.rb CHANGED Viewed

@@ -81,7 +81,7 @@ module Rumale
         wx = @params[:whiten] ? (x - @mean).dot(whiten_mat.transpose) : x
         unmixing, @n_iter = ica(wx, @params[:fun], @params[:max_iter], @params[:tol], @rng.dup)
         @components = @params[:whiten] ? unmixing.dot(whiten_mat) : unmixing
-        @mixing = Numo::Linalg.pinv(@components)
+        @mixing = Numo::Linalg.pinv(@components).dup
         if @params[:n_components] == 1
           @components = @components.flatten.dup
           @mixing = @mixing.flatten.dup

data/lib/rumale/ensemble/gradient_boosting_classifier.rb CHANGED Viewed

@@ -161,7 +161,7 @@ module Rumale
         proba = 1.0 / (Numo::NMath.exp(-decision_function(x)) + 1.0)
-        return (proba.transpose / proba.sum(axis: 1)).transpose if @classes.size > 2
+        return (proba.transpose / proba.sum(axis: 1)).transpose.dup if @classes.size > 2
         n_samples, = x.shape
         probs = Numo::DFloat.zeros(n_samples, 2)
@@ -182,7 +182,7 @@ module Rumale
                    else
                      @estimators.map { |tree| tree.apply(x) }
                    end
-        Numo::Int32[*leaf_ids].transpose
+        Numo::Int32[*leaf_ids].transpose.dup
       end
       private

data/lib/rumale/ensemble/gradient_boosting_regressor.rb CHANGED Viewed

@@ -144,7 +144,7 @@ module Rumale
                    else
                      @estimators.map { |tree| tree.apply(x) }
                    end
-        Numo::Int32[*leaf_ids].transpose
+        Numo::Int32[*leaf_ids].transpose.dup
       end
       private

data/lib/rumale/ensemble/random_forest_classifier.rb CHANGED Viewed

@@ -159,7 +159,7 @@ module Rumale
       # @return [Numo::Int32] (shape: [n_samples, n_estimators]) Leaf index for sample.
       def apply(x)
         x = check_convert_sample_array(x)
-        Numo::Int32[*Array.new(@params[:n_estimators]) { |n| @estimators[n].apply(x) }].transpose
+        Numo::Int32[*Array.new(@params[:n_estimators]) { |n| @estimators[n].apply(x) }].transpose.dup
       end
       private

data/lib/rumale/ensemble/random_forest_regressor.rb CHANGED Viewed

@@ -136,7 +136,7 @@ module Rumale
       # @return [Numo::Int32] (shape: [n_samples, n_estimators]) Leaf index for sample.
       def apply(x)
         x = check_convert_sample_array(x)
-        Numo::Int32[*Array.new(@params[:n_estimators]) { |n| @estimators[n].apply(x) }].transpose
+        Numo::Int32[*Array.new(@params[:n_estimators]) { |n| @estimators[n].apply(x) }].transpose.dup
       end
       private

data/lib/rumale/evaluation_measure/roc_auc.rb CHANGED Viewed

@@ -118,8 +118,7 @@ module Rumale
         desc_y_true = Numo::Int32.cast(bin_y_true[desc_pred_ids])
         desc_y_score = y_score[desc_pred_ids]
-        dist_value_ids = desc_y_score.diff.ne(0).where
-        threshold_ids = dist_value_ids.append(desc_y_true.size - 1)
+        threshold_ids = Numo::Int32.cast(desc_y_score.diff.ne(0).where.to_a.append(desc_y_true.size - 1))
         true_pos = desc_y_true.cumsum[threshold_ids]
         false_pos = 1 + threshold_ids - true_pos

data/lib/rumale/kernel_approximation/nystroem.rb CHANGED Viewed

@@ -73,7 +73,7 @@ module Rumale
         # random sampling.
         @component_indices = Numo::Int32.cast(Array(0...n_samples).shuffle(random: sub_rng)[0...n_components])
-        @components = x[@component_indices, true]
+        @components = x[@component_indices, true].dup
         # calculate normalizing factor.
         kernel_mat = kernel_mat(@components)

data/lib/rumale/kernel_machine/kernel_svc.rb CHANGED Viewed

@@ -152,7 +152,7 @@ module Rumale
         if @classes.size > 2
           probs = 1.0 / (Numo::NMath.exp(@prob_param[true, 0] * decision_function(x) + @prob_param[true, 1]) + 1.0)
-          return (probs.transpose / probs.sum(axis: 1)).transpose
+          return (probs.transpose / probs.sum(axis: 1)).transpose.dup
         end
         n_samples, = x.shape