RubyGems - rumale - Versions diffs - 0.23.3 → 0.24.0 - Mend

rumale 0.23.3 → 0.24.0

Files changed (142) hide show

checksums.yaml +4 -4
data/LICENSE.txt +5 -1
data/README.md +3 -288
data/lib/rumale/version.rb +1 -1
data/lib/rumale.rb +20 -131
metadata +252 -150
data/CHANGELOG.md +0 -643
data/CODE_OF_CONDUCT.md +0 -74
data/ext/rumale/extconf.rb +0 -37
data/ext/rumale/rumaleext.c +0 -545
data/ext/rumale/rumaleext.h +0 -12
data/lib/rumale/base/base_estimator.rb +0 -49
data/lib/rumale/base/classifier.rb +0 -36
data/lib/rumale/base/cluster_analyzer.rb +0 -31
data/lib/rumale/base/evaluator.rb +0 -17
data/lib/rumale/base/regressor.rb +0 -36
data/lib/rumale/base/splitter.rb +0 -21
data/lib/rumale/base/transformer.rb +0 -22
data/lib/rumale/clustering/dbscan.rb +0 -123
data/lib/rumale/clustering/gaussian_mixture.rb +0 -218
data/lib/rumale/clustering/hdbscan.rb +0 -291
data/lib/rumale/clustering/k_means.rb +0 -122
data/lib/rumale/clustering/k_medoids.rb +0 -141
data/lib/rumale/clustering/mini_batch_k_means.rb +0 -139
data/lib/rumale/clustering/power_iteration.rb +0 -127
data/lib/rumale/clustering/single_linkage.rb +0 -203
data/lib/rumale/clustering/snn.rb +0 -76
data/lib/rumale/clustering/spectral_clustering.rb +0 -115
data/lib/rumale/dataset.rb +0 -246
data/lib/rumale/decomposition/factor_analysis.rb +0 -150
data/lib/rumale/decomposition/fast_ica.rb +0 -188
data/lib/rumale/decomposition/nmf.rb +0 -124
data/lib/rumale/decomposition/pca.rb +0 -159
data/lib/rumale/ensemble/ada_boost_classifier.rb +0 -179
data/lib/rumale/ensemble/ada_boost_regressor.rb +0 -160
data/lib/rumale/ensemble/extra_trees_classifier.rb +0 -139
data/lib/rumale/ensemble/extra_trees_regressor.rb +0 -125
data/lib/rumale/ensemble/gradient_boosting_classifier.rb +0 -306
data/lib/rumale/ensemble/gradient_boosting_regressor.rb +0 -237
data/lib/rumale/ensemble/random_forest_classifier.rb +0 -189
data/lib/rumale/ensemble/random_forest_regressor.rb +0 -153
data/lib/rumale/ensemble/stacking_classifier.rb +0 -215
data/lib/rumale/ensemble/stacking_regressor.rb +0 -163
data/lib/rumale/ensemble/voting_classifier.rb +0 -126
data/lib/rumale/ensemble/voting_regressor.rb +0 -82
data/lib/rumale/evaluation_measure/accuracy.rb +0 -29
data/lib/rumale/evaluation_measure/adjusted_rand_score.rb +0 -74
data/lib/rumale/evaluation_measure/calinski_harabasz_score.rb +0 -56
data/lib/rumale/evaluation_measure/davies_bouldin_score.rb +0 -53
data/lib/rumale/evaluation_measure/explained_variance_score.rb +0 -39
data/lib/rumale/evaluation_measure/f_score.rb +0 -50
data/lib/rumale/evaluation_measure/function.rb +0 -147
data/lib/rumale/evaluation_measure/log_loss.rb +0 -45
data/lib/rumale/evaluation_measure/mean_absolute_error.rb +0 -29
data/lib/rumale/evaluation_measure/mean_squared_error.rb +0 -29
data/lib/rumale/evaluation_measure/mean_squared_log_error.rb +0 -29
data/lib/rumale/evaluation_measure/median_absolute_error.rb +0 -30
data/lib/rumale/evaluation_measure/mutual_information.rb +0 -49
data/lib/rumale/evaluation_measure/normalized_mutual_information.rb +0 -53
data/lib/rumale/evaluation_measure/precision.rb +0 -50
data/lib/rumale/evaluation_measure/precision_recall.rb +0 -96
data/lib/rumale/evaluation_measure/purity.rb +0 -40
data/lib/rumale/evaluation_measure/r2_score.rb +0 -43
data/lib/rumale/evaluation_measure/recall.rb +0 -50
data/lib/rumale/evaluation_measure/roc_auc.rb +0 -130
data/lib/rumale/evaluation_measure/silhouette_score.rb +0 -82
data/lib/rumale/feature_extraction/feature_hasher.rb +0 -110
data/lib/rumale/feature_extraction/hash_vectorizer.rb +0 -155
data/lib/rumale/feature_extraction/tfidf_transformer.rb +0 -113
data/lib/rumale/kernel_approximation/nystroem.rb +0 -126
data/lib/rumale/kernel_approximation/rbf.rb +0 -102
data/lib/rumale/kernel_machine/kernel_fda.rb +0 -120
data/lib/rumale/kernel_machine/kernel_pca.rb +0 -97
data/lib/rumale/kernel_machine/kernel_ridge.rb +0 -82
data/lib/rumale/kernel_machine/kernel_ridge_classifier.rb +0 -92
data/lib/rumale/kernel_machine/kernel_svc.rb +0 -193
data/lib/rumale/linear_model/base_sgd.rb +0 -285
data/lib/rumale/linear_model/elastic_net.rb +0 -119
data/lib/rumale/linear_model/lasso.rb +0 -115
data/lib/rumale/linear_model/linear_regression.rb +0 -201
data/lib/rumale/linear_model/logistic_regression.rb +0 -275
data/lib/rumale/linear_model/nnls.rb +0 -137
data/lib/rumale/linear_model/ridge.rb +0 -209
data/lib/rumale/linear_model/svc.rb +0 -213
data/lib/rumale/linear_model/svr.rb +0 -132
data/lib/rumale/manifold/mds.rb +0 -155
data/lib/rumale/manifold/tsne.rb +0 -222
data/lib/rumale/metric_learning/fisher_discriminant_analysis.rb +0 -113
data/lib/rumale/metric_learning/mlkr.rb +0 -161
data/lib/rumale/metric_learning/neighbourhood_component_analysis.rb +0 -167
data/lib/rumale/model_selection/cross_validation.rb +0 -125
data/lib/rumale/model_selection/function.rb +0 -42
data/lib/rumale/model_selection/grid_search_cv.rb +0 -225
data/lib/rumale/model_selection/group_k_fold.rb +0 -93
data/lib/rumale/model_selection/group_shuffle_split.rb +0 -115
data/lib/rumale/model_selection/k_fold.rb +0 -81
data/lib/rumale/model_selection/shuffle_split.rb +0 -90
data/lib/rumale/model_selection/stratified_k_fold.rb +0 -99
data/lib/rumale/model_selection/stratified_shuffle_split.rb +0 -118
data/lib/rumale/model_selection/time_series_split.rb +0 -91
data/lib/rumale/multiclass/one_vs_rest_classifier.rb +0 -83
data/lib/rumale/naive_bayes/base_naive_bayes.rb +0 -47
data/lib/rumale/naive_bayes/bernoulli_nb.rb +0 -82
data/lib/rumale/naive_bayes/complement_nb.rb +0 -85
data/lib/rumale/naive_bayes/gaussian_nb.rb +0 -69
data/lib/rumale/naive_bayes/multinomial_nb.rb +0 -74
data/lib/rumale/naive_bayes/negation_nb.rb +0 -71
data/lib/rumale/nearest_neighbors/k_neighbors_classifier.rb +0 -133
data/lib/rumale/nearest_neighbors/k_neighbors_regressor.rb +0 -108
data/lib/rumale/nearest_neighbors/vp_tree.rb +0 -132
data/lib/rumale/neural_network/adam.rb +0 -56
data/lib/rumale/neural_network/base_mlp.rb +0 -248
data/lib/rumale/neural_network/mlp_classifier.rb +0 -120
data/lib/rumale/neural_network/mlp_regressor.rb +0 -90
data/lib/rumale/pairwise_metric.rb +0 -152
data/lib/rumale/pipeline/feature_union.rb +0 -69
data/lib/rumale/pipeline/pipeline.rb +0 -175
data/lib/rumale/preprocessing/bin_discretizer.rb +0 -93
data/lib/rumale/preprocessing/binarizer.rb +0 -60
data/lib/rumale/preprocessing/kernel_calculator.rb +0 -92
data/lib/rumale/preprocessing/l1_normalizer.rb +0 -62
data/lib/rumale/preprocessing/l2_normalizer.rb +0 -63
data/lib/rumale/preprocessing/label_binarizer.rb +0 -89
data/lib/rumale/preprocessing/label_encoder.rb +0 -79
data/lib/rumale/preprocessing/max_abs_scaler.rb +0 -61
data/lib/rumale/preprocessing/max_normalizer.rb +0 -62
data/lib/rumale/preprocessing/min_max_scaler.rb +0 -76
data/lib/rumale/preprocessing/one_hot_encoder.rb +0 -100
data/lib/rumale/preprocessing/ordinal_encoder.rb +0 -109
data/lib/rumale/preprocessing/polynomial_features.rb +0 -109
data/lib/rumale/preprocessing/standard_scaler.rb +0 -71
data/lib/rumale/probabilistic_output.rb +0 -114
data/lib/rumale/tree/base_decision_tree.rb +0 -150
data/lib/rumale/tree/decision_tree_classifier.rb +0 -150
data/lib/rumale/tree/decision_tree_regressor.rb +0 -116
data/lib/rumale/tree/extra_tree_classifier.rb +0 -107
data/lib/rumale/tree/extra_tree_regressor.rb +0 -94
data/lib/rumale/tree/gradient_tree_regressor.rb +0 -202
data/lib/rumale/tree/node.rb +0 -39
data/lib/rumale/utils.rb +0 -42
data/lib/rumale/validation.rb +0 -128
data/lib/rumale/values.rb +0 -13

data/CODE_OF_CONDUCT.md DELETED Viewed

@@ -1,74 +0,0 @@
-# Contributor Covenant Code of Conduct
-## Our Pledge
-In the interest of fostering an open and welcoming environment, we as
-contributors and maintainers pledge to making participation in our project and
-our community a harassment-free experience for everyone, regardless of age, body
-size, disability, ethnicity, gender identity and expression, level of experience,
-nationality, personal appearance, race, religion, or sexual identity and
-orientation.
-## Our Standards
-Examples of behavior that contributes to creating a positive environment
-include:
-* Using welcoming and inclusive language
-* Being respectful of differing viewpoints and experiences
-* Gracefully accepting constructive criticism
-* Focusing on what is best for the community
-* Showing empathy towards other community members
-Examples of unacceptable behavior by participants include:
-* The use of sexualized language or imagery and unwelcome sexual attention or
-advances
-* Trolling, insulting/derogatory comments, and personal or political attacks
-* Public or private harassment
-* Publishing others' private information, such as a physical or electronic
-  address, without explicit permission
-* Other conduct which could reasonably be considered inappropriate in a
-  professional setting
-## Our Responsibilities
-Project maintainers are responsible for clarifying the standards of acceptable
-behavior and are expected to take appropriate and fair corrective action in
-response to any instances of unacceptable behavior.
-Project maintainers have the right and responsibility to remove, edit, or
-reject comments, commits, code, wiki edits, issues, and other contributions
-that are not aligned to this Code of Conduct, or to ban temporarily or
-permanently any contributor for other behaviors that they deem inappropriate,
-threatening, offensive, or harmful.
-## Scope
-This Code of Conduct applies both within project spaces and in public spaces
-when an individual is representing the project or its community. Examples of
-representing a project or community include using an official project e-mail
-address, posting via an official social media account, or acting as an appointed
-representative at an online or offline event. Representation of a project may be
-further defined and clarified by project maintainers.
-## Enforcement
-Instances of abusive, harassing, or otherwise unacceptable behavior may be
-reported by contacting the project team at yoshoku@outlook.com. All
-complaints will be reviewed and investigated and will result in a response that
-is deemed necessary and appropriate to the circumstances. The project team is
-obligated to maintain confidentiality with regard to the reporter of an incident.
-Further details of specific enforcement policies may be posted separately.
-Project maintainers who do not follow or enforce the Code of Conduct in good
-faith may face temporary or permanent repercussions as determined by other
-members of the project's leadership.
-## Attribution
-This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
-available at [http://contributor-covenant.org/version/1/4][version]
-[homepage]: http://contributor-covenant.org
-[version]: http://contributor-covenant.org/version/1/4/

data/ext/rumale/extconf.rb DELETED Viewed

@@ -1,37 +0,0 @@
-# frozen_string_literal: true
-require 'mkmf'
-require 'numo/narray'
-$LOAD_PATH.each do |lp|
-  if File.exist?(File.join(lp, 'numo/numo/narray.h'))
-    $INCFLAGS = "-I#{lp}/numo #{$INCFLAGS}"
-    break
-  end
-end
-unless have_header('numo/narray.h')
-  puts 'numo/narray.h not found.'
-  exit(1)
-end
-if RUBY_PLATFORM =~ /mswin|cygwin|mingw/
-  $LOAD_PATH.each do |lp|
-    if File.exist?(File.join(lp, 'numo/libnarray.a'))
-      $LDFLAGS = "-L#{lp}/numo #{$LDFLAGS}"
-      break
-    end
-  end
-  unless have_library('narray', 'nary_new')
-    puts 'libnarray.a not found.'
-    exit(1)
-  end
-end
-if RUBY_PLATFORM.match?(/darwin/) && Gem::Version.new('3.1.0') <= Gem::Version.new(RUBY_VERSION)
-  if try_link('int main(void){return 0;}', '-Wl,-undefined,dynamic_lookup')
-    $LDFLAGS << ' -Wl,-undefined,dynamic_lookup'
-  end
-end
-create_makefile('rumale/rumaleext')

data/ext/rumale/rumaleext.c DELETED Viewed

@@ -1,545 +0,0 @@
-#include "rumaleext.h"
-double* alloc_dbl_array(const long n_dimensions) {
-  double* arr = ALLOC_N(double, n_dimensions);
-  memset(arr, 0, n_dimensions * sizeof(double));
-  return arr;
-}
-double calc_gini_coef(double* histogram, const long n_elements, const long n_classes) {
-  long i;
-  double el;
-  double gini = 0.0;
-  for (i = 0; i < n_classes; i++) {
-    el = histogram[i] / n_elements;
-    gini += el * el;
-  }
-  return 1.0 - gini;
-}
-double calc_entropy(double* histogram, const long n_elements, const long n_classes) {
-  long i;
-  double el;
-  double entropy = 0.0;
-  for (i = 0; i < n_classes; i++) {
-    el = histogram[i] / n_elements;
-    entropy += el * log(el + 1.0);
-  }
-  return -entropy;
-}
-VALUE
-calc_mean_vec(double* sum_vec, const long n_dimensions, const long n_elements) {
-  long i;
-  VALUE mean_vec = rb_ary_new2(n_dimensions);
-  for (i = 0; i < n_dimensions; i++) {
-    rb_ary_store(mean_vec, i, DBL2NUM(sum_vec[i] / n_elements));
-  }
-  return mean_vec;
-}
-double calc_vec_mae(VALUE vec_a, VALUE vec_b) {
-  long i;
-  const long n_dimensions = RARRAY_LEN(vec_a);
-  double sum = 0.0;
-  double diff;
-  for (i = 0; i < n_dimensions; i++) {
-    diff = NUM2DBL(rb_ary_entry(vec_a, i)) - NUM2DBL(rb_ary_entry(vec_b, i));
-    sum += fabs(diff);
-  }
-  return sum / n_dimensions;
-}
-double calc_vec_mse(VALUE vec_a, VALUE vec_b) {
-  long i;
-  const long n_dimensions = RARRAY_LEN(vec_a);
-  double sum = 0.0;
-  double diff;
-  for (i = 0; i < n_dimensions; i++) {
-    diff = NUM2DBL(rb_ary_entry(vec_a, i)) - NUM2DBL(rb_ary_entry(vec_b, i));
-    sum += diff * diff;
-  }
-  return sum / n_dimensions;
-}
-double calc_mae(VALUE target_vecs, VALUE mean_vec) {
-  long i;
-  const long n_elements = RARRAY_LEN(target_vecs);
-  double sum = 0.0;
-  for (i = 0; i < n_elements; i++) {
-    sum += calc_vec_mae(rb_ary_entry(target_vecs, i), mean_vec);
-  }
-  return sum / n_elements;
-}
-double calc_mse(VALUE target_vecs, VALUE mean_vec) {
-  long i;
-  const long n_elements = RARRAY_LEN(target_vecs);
-  double sum = 0.0;
-  for (i = 0; i < n_elements; i++) {
-    sum += calc_vec_mse(rb_ary_entry(target_vecs, i), mean_vec);
-  }
-  return sum / n_elements;
-}
-double calc_impurity_cls(const char* criterion, double* histogram, const long n_elements, const long n_classes) {
-  if (strcmp(criterion, "entropy") == 0) {
-    return calc_entropy(histogram, n_elements, n_classes);
-  }
-  return calc_gini_coef(histogram, n_elements, n_classes);
-}
-double calc_impurity_reg(const char* criterion, VALUE target_vecs, double* sum_vec) {
-  const long n_elements = RARRAY_LEN(target_vecs);
-  const long n_dimensions = RARRAY_LEN(rb_ary_entry(target_vecs, 0));
-  VALUE mean_vec = calc_mean_vec(sum_vec, n_dimensions, n_elements);
-  if (strcmp(criterion, "mae") == 0) {
-    return calc_mae(target_vecs, mean_vec);
-  }
-  return calc_mse(target_vecs, mean_vec);
-}
-void add_sum_vec(double* sum_vec, VALUE target) {
-  long i;
-  const long n_dimensions = RARRAY_LEN(target);
-  for (i = 0; i < n_dimensions; i++) {
-    sum_vec[i] += NUM2DBL(rb_ary_entry(target, i));
-  }
-}
-void sub_sum_vec(double* sum_vec, VALUE target) {
-  long i;
-  const long n_dimensions = RARRAY_LEN(target);
-  for (i = 0; i < n_dimensions; i++) {
-    sum_vec[i] -= NUM2DBL(rb_ary_entry(target, i));
-  }
-}
-/**
- * @!visibility private
- */
-typedef struct {
-  char* criterion;
-  long n_classes;
-  double impurity;
-} split_opts_cls;
-/**
- * @!visibility private
- */
-static void iter_find_split_params_cls(na_loop_t const* lp) {
-  const int32_t* o = (int32_t*)NDL_PTR(lp, 0);
-  const double* f = (double*)NDL_PTR(lp, 1);
-  const int32_t* y = (int32_t*)NDL_PTR(lp, 2);
-  const long n_elements = NDL_SHAPE(lp, 0)[0];
-  const char* criterion = ((split_opts_cls*)lp->opt_ptr)->criterion;
-  const long n_classes = ((split_opts_cls*)lp->opt_ptr)->n_classes;
-  const double w_impurity = ((split_opts_cls*)lp->opt_ptr)->impurity;
-  double* params = (double*)NDL_PTR(lp, 3);
-  long i;
-  long curr_pos = 0;
-  long next_pos = 0;
-  long n_l_elements = 0;
-  long n_r_elements = n_elements;
-  double curr_el = f[o[0]];
-  double last_el = f[o[n_elements - 1]];
-  double next_el;
-  double l_impurity;
-  double r_impurity;
-  double gain;
-  double* l_histogram = alloc_dbl_array(n_classes);
-  double* r_histogram = alloc_dbl_array(n_classes);
-  /* Initialize optimal parameters. */
-  params[0] = 0.0;        /* left impurity */
-  params[1] = w_impurity; /* right impurity */
-  params[2] = curr_el;    /* threshold */
-  params[3] = 0.0;        /* gain */
-  /* Initialize child node variables. */
-  for (i = 0; i < n_elements; i++) {
-    r_histogram[y[o[i]]] += 1.0;
-  }
-  /* Find optimal parameters. */
-  while (curr_pos < n_elements && curr_el != last_el) {
-    next_el = f[o[next_pos]];
-    while (next_pos < n_elements && next_el == curr_el) {
-      l_histogram[y[o[next_pos]]] += 1;
-      n_l_elements++;
-      r_histogram[y[o[next_pos]]] -= 1;
-      n_r_elements--;
-      next_pos++;
-      next_el = f[o[next_pos]];
-    }
-    /* Calculate gain of new split. */
-    l_impurity = calc_impurity_cls(criterion, l_histogram, n_l_elements, n_classes);
-    r_impurity = calc_impurity_cls(criterion, r_histogram, n_r_elements, n_classes);
-    gain = w_impurity - (n_l_elements * l_impurity + n_r_elements * r_impurity) / n_elements;
-    /* Update optimal parameters. */
-    if (gain > params[3]) {
-      params[0] = l_impurity;
-      params[1] = r_impurity;
-      params[2] = 0.5 * (curr_el + next_el);
-      params[3] = gain;
-    }
-    if (next_pos == n_elements) break;
-    curr_pos = next_pos;
-    curr_el = f[o[curr_pos]];
-  }
-  xfree(l_histogram);
-  xfree(r_histogram);
-}
-/**
- * @!visibility private
- * Find for split point with maximum information gain.
- *
- * @overload find_split_params(criterion, impurity, order, features, labels, n_classes) -> Array<Float>
- *
- * @param criterion [String] The function to evaluate spliting point. Supported criteria are 'gini' and 'entropy'.
- * @param impurity [Float] The impurity of whole dataset.
- * @param order [Numo::Int32] (shape: [n_elements]) The element indices sorted according to feature values.
- * @param features [Numo::DFloat] (shape: [n_elements]) The feature values.
- * @param labels [Numo::Int32] (shape: [n_elements]) The labels.
- * @param n_classes [Integer] The number of classes.
- * @return [Array<Float>] The array consists of optimal parameters including impurities of child nodes, threshold, and gain.
- */
-static VALUE find_split_params_cls(VALUE self, VALUE criterion, VALUE impurity, VALUE order, VALUE features, VALUE labels,
-                                   VALUE n_classes) {
-  ndfunc_arg_in_t ain[3] = {{numo_cInt32, 1}, {numo_cDFloat, 1}, {numo_cInt32, 1}};
-  size_t out_shape[1] = {4};
-  ndfunc_arg_out_t aout[1] = {{numo_cDFloat, 1, out_shape}};
-  ndfunc_t ndf = {(na_iter_func_t)iter_find_split_params_cls, NO_LOOP, 3, 1, ain, aout};
-  split_opts_cls opts = {StringValuePtr(criterion), NUM2LONG(n_classes), NUM2DBL(impurity)};
-  VALUE params = na_ndloop3(&ndf, &opts, 3, order, features, labels);
-  VALUE results = rb_ary_new2(4);
-  double* params_ptr = (double*)na_get_pointer_for_read(params);
-  rb_ary_store(results, 0, DBL2NUM(params_ptr[0]));
-  rb_ary_store(results, 1, DBL2NUM(params_ptr[1]));
-  rb_ary_store(results, 2, DBL2NUM(params_ptr[2]));
-  rb_ary_store(results, 3, DBL2NUM(params_ptr[3]));
-  RB_GC_GUARD(params);
-  RB_GC_GUARD(criterion);
-  return results;
-}
-/**
- * @!visibility private
- */
-typedef struct {
-  char* criterion;
-  double impurity;
-} split_opts_reg;
-/**
- * @!visibility private
- */
-static void iter_find_split_params_reg(na_loop_t const* lp) {
-  const int32_t* o = (int32_t*)NDL_PTR(lp, 0);
-  const double* f = (double*)NDL_PTR(lp, 1);
-  const double* y = (double*)NDL_PTR(lp, 2);
-  const long n_elements = NDL_SHAPE(lp, 0)[0];
-  const long n_outputs = NDL_SHAPE(lp, 2)[1];
-  const char* criterion = ((split_opts_reg*)lp->opt_ptr)->criterion;
-  const double w_impurity = ((split_opts_reg*)lp->opt_ptr)->impurity;
-  double* params = (double*)NDL_PTR(lp, 3);
-  long i, j;
-  long curr_pos = 0;
-  long next_pos = 0;
-  long n_l_elements = 0;
-  long n_r_elements = n_elements;
-  double curr_el = f[o[0]];
-  double last_el = f[o[n_elements - 1]];
-  double next_el;
-  double l_impurity;
-  double r_impurity;
-  double gain;
-  double* l_sum_vec = alloc_dbl_array(n_outputs);
-  double* r_sum_vec = alloc_dbl_array(n_outputs);
-  double target_var;
-  VALUE l_target_vecs = rb_ary_new();
-  VALUE r_target_vecs = rb_ary_new();
-  VALUE target;
-  /* Initialize optimal parameters. */
-  params[0] = 0.0;        /* left impurity */
-  params[1] = w_impurity; /* right impurity */
-  params[2] = curr_el;    /* threshold */
-  params[3] = 0.0;        /* gain */
-  /* Initialize child node variables. */
-  for (i = 0; i < n_elements; i++) {
-    target = rb_ary_new2(n_outputs);
-    for (j = 0; j < n_outputs; j++) {
-      target_var = y[o[i] * n_outputs + j];
-      rb_ary_store(target, j, DBL2NUM(target_var));
-      r_sum_vec[j] += target_var;
-    }
-    rb_ary_push(r_target_vecs, target);
-  }
-  /* Find optimal parameters. */
-  while (curr_pos < n_elements && curr_el != last_el) {
-    next_el = f[o[next_pos]];
-    while (next_pos < n_elements && next_el == curr_el) {
-      target = rb_ary_shift(r_target_vecs);
-      n_r_elements--;
-      sub_sum_vec(r_sum_vec, target);
-      rb_ary_push(l_target_vecs, target);
-      n_l_elements++;
-      add_sum_vec(l_sum_vec, target);
-      next_pos++;
-      next_el = f[o[next_pos]];
-    }
-    /* Calculate gain of new split. */
-    l_impurity = calc_impurity_reg(criterion, l_target_vecs, l_sum_vec);
-    r_impurity = calc_impurity_reg(criterion, r_target_vecs, r_sum_vec);
-    gain = w_impurity - (n_l_elements * l_impurity + n_r_elements * r_impurity) / n_elements;
-    /* Update optimal parameters. */
-    if (gain > params[3]) {
-      params[0] = l_impurity;
-      params[1] = r_impurity;
-      params[2] = 0.5 * (curr_el + next_el);
-      params[3] = gain;
-    }
-    if (next_pos == n_elements) break;
-    curr_pos = next_pos;
-    curr_el = f[o[curr_pos]];
-  }
-  xfree(l_sum_vec);
-  xfree(r_sum_vec);
-}
-/**
- * @!visibility private
- * Find for split point with maximum information gain.
- *
- * @overload find_split_params(criterion, impurity, order, features, targets) -> Array<Float>
- *
- * @param criterion [String] The function to evaluate spliting point. Supported criteria are 'mae' and 'mse'.
- * @param impurity [Float] The impurity of whole dataset.
- * @param order [Numo::Int32] (shape: [n_samples]) The element indices sorted according to feature values in ascending order.
- * @param features [Numo::DFloat] (shape: [n_samples]) The feature values.
- * @param targets [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values.
- * @return [Array<Float>] The array consists of optimal parameters including impurities of child nodes, threshold, and gain.
- */
-static VALUE find_split_params_reg(VALUE self, VALUE criterion, VALUE impurity, VALUE order, VALUE features, VALUE targets) {
-  ndfunc_arg_in_t ain[3] = {{numo_cInt32, 1}, {numo_cDFloat, 1}, {numo_cDFloat, 2}};
-  size_t out_shape[1] = {4};
-  ndfunc_arg_out_t aout[1] = {{numo_cDFloat, 1, out_shape}};
-  ndfunc_t ndf = {(na_iter_func_t)iter_find_split_params_reg, NO_LOOP, 3, 1, ain, aout};
-  split_opts_reg opts = {StringValuePtr(criterion), NUM2DBL(impurity)};
-  VALUE params = na_ndloop3(&ndf, &opts, 3, order, features, targets);
-  VALUE results = rb_ary_new2(4);
-  double* params_ptr = (double*)na_get_pointer_for_read(params);
-  rb_ary_store(results, 0, DBL2NUM(params_ptr[0]));
-  rb_ary_store(results, 1, DBL2NUM(params_ptr[1]));
-  rb_ary_store(results, 2, DBL2NUM(params_ptr[2]));
-  rb_ary_store(results, 3, DBL2NUM(params_ptr[3]));
-  RB_GC_GUARD(params);
-  RB_GC_GUARD(criterion);
-  return results;
-}
-/**
- * @!visibility private
- */
-static void iter_find_split_params_grad_reg(na_loop_t const* lp) {
-  const int32_t* o = (int32_t*)NDL_PTR(lp, 0);
-  const double* f = (double*)NDL_PTR(lp, 1);
-  const double* g = (double*)NDL_PTR(lp, 2);
-  const double* h = (double*)NDL_PTR(lp, 3);
-  const double s_grad = ((double*)lp->opt_ptr)[0];
-  const double s_hess = ((double*)lp->opt_ptr)[1];
-  const double reg_lambda = ((double*)lp->opt_ptr)[2];
-  const long n_elements = NDL_SHAPE(lp, 0)[0];
-  double* params = (double*)NDL_PTR(lp, 4);
-  long curr_pos = 0;
-  long next_pos = 0;
-  double curr_el = f[o[0]];
-  double last_el = f[o[n_elements - 1]];
-  double next_el;
-  double l_grad = 0.0;
-  double l_hess = 0.0;
-  double r_grad;
-  double r_hess;
-  double threshold = curr_el;
-  double gain_max = 0.0;
-  double gain;
-  /* Find optimal parameters. */
-  while (curr_pos < n_elements && curr_el != last_el) {
-    next_el = f[o[next_pos]];
-    while (next_pos < n_elements && next_el == curr_el) {
-      l_grad += g[o[next_pos]];
-      l_hess += h[o[next_pos]];
-      next_pos++;
-      next_el = f[o[next_pos]];
-    }
-    /* Calculate gain of new split. */
-    r_grad = s_grad - l_grad;
-    r_hess = s_hess - l_hess;
-    gain = (l_grad * l_grad) / (l_hess + reg_lambda) + (r_grad * r_grad) / (r_hess + reg_lambda) -
-           (s_grad * s_grad) / (s_hess + reg_lambda);
-    /* Update optimal parameters. */
-    if (gain > gain_max) {
-      threshold = 0.5 * (curr_el + next_el);
-      gain_max = gain;
-    }
-    if (next_pos == n_elements) {
-      break;
-    }
-    curr_pos = next_pos;
-    curr_el = f[o[curr_pos]];
-  }
-  params[0] = threshold;
-  params[1] = gain_max;
-}
-/**
- * @!visibility private
- * Find for split point with maximum information gain.
- *
- * @overload find_split_params(order, features, gradients, hessians, sum_gradient, sum_hessian, reg_lambda) -> Array<Float>
- *   @param order [Numo::Int32] (shape: [n_elements]) The element indices sorted according to feature values.
- *   @param features [Numo::DFloat] (shape: [n_elements]) The feature values.
- *   @param gradients [Numo::DFloat] (shape: [n_elements]) The gradient values.
- *   @param hessians [Numo::DFloat] (shape: [n_elements]) The hessian values.
- *   @param sum_gradient [Float] The sum of gradient values.
- *   @param sum_hessian [Float] The sum of hessian values.
- *   @param reg_lambda [Float] The L2 regularization term on weight.
- * @return [Array<Float>] The array consists of optimal parameters including threshold and gain.
- */
-static VALUE find_split_params_grad_reg(VALUE self, VALUE order, VALUE features, VALUE gradients, VALUE hessians,
-                                        VALUE sum_gradient, VALUE sum_hessian, VALUE reg_lambda) {
-  ndfunc_arg_in_t ain[4] = {{numo_cInt32, 1}, {numo_cDFloat, 1}, {numo_cDFloat, 1}, {numo_cDFloat, 1}};
-  size_t out_shape[1] = {2};
-  ndfunc_arg_out_t aout[1] = {{numo_cDFloat, 1, out_shape}};
-  ndfunc_t ndf = {(na_iter_func_t)iter_find_split_params_grad_reg, NO_LOOP, 4, 1, ain, aout};
-  double opts[3] = {NUM2DBL(sum_gradient), NUM2DBL(sum_hessian), NUM2DBL(reg_lambda)};
-  VALUE params = na_ndloop3(&ndf, opts, 4, order, features, gradients, hessians);
-  VALUE results = rb_ary_new2(2);
-  double* params_ptr = (double*)na_get_pointer_for_read(params);
-  rb_ary_store(results, 0, DBL2NUM(params_ptr[0]));
-  rb_ary_store(results, 1, DBL2NUM(params_ptr[1]));
-  RB_GC_GUARD(params);
-  return results;
-}
-/**
- * @!visibility private
- * Calculate impurity based on criterion.
- *
- * @overload node_impurity(criterion, y, n_classes) -> Float
- *
- * @param criterion [String] The function to calculate impurity. Supported criteria are 'gini' and 'entropy'.
- * @param y_nary [Numo::Int32] (shape: [n_samples]) The labels.
- * @param n_elements_ [Integer] The number of elements.
- * @param n_classes_ [Integer] The number of classes.
- * @return [Float] impurity
- */
-static VALUE node_impurity_cls(VALUE self, VALUE criterion, VALUE y_nary, VALUE n_elements_, VALUE n_classes_) {
-  long i;
-  const long n_classes = NUM2LONG(n_classes_);
-  const long n_elements = NUM2LONG(n_elements_);
-  const int32_t* y = (int32_t*)na_get_pointer_for_read(y_nary);
-  double* histogram = alloc_dbl_array(n_classes);
-  VALUE ret;
-  for (i = 0; i < n_elements; i++) {
-    histogram[y[i]] += 1;
-  }
-  ret = DBL2NUM(calc_impurity_cls(StringValuePtr(criterion), histogram, n_elements, n_classes));
-  xfree(histogram);
-  RB_GC_GUARD(y_nary);
-  RB_GC_GUARD(criterion);
-  return ret;
-}
-/**
- * @!visibility private
- * Calculate impurity based on criterion.
- *
- * @overload node_impurity(criterion, y) -> Float
- *
- * @param criterion [String] The function to calculate impurity. Supported criteria are 'mae' and 'mse'.
- * @param y [Array<Float>] (shape: [n_samples, n_outputs]) The taget values.
- * @return [Float] impurity
- */
-static VALUE node_impurity_reg(VALUE self, VALUE criterion, VALUE y) {
-  long i;
-  const long n_elements = RARRAY_LEN(y);
-  const long n_outputs = RARRAY_LEN(rb_ary_entry(y, 0));
-  double* sum_vec = alloc_dbl_array(n_outputs);
-  VALUE target_vecs = rb_ary_new();
-  VALUE target;
-  VALUE ret;
-  for (i = 0; i < n_elements; i++) {
-    target = rb_ary_entry(y, i);
-    add_sum_vec(sum_vec, target);
-    rb_ary_push(target_vecs, target);
-  }
-  ret = DBL2NUM(calc_impurity_reg(StringValuePtr(criterion), target_vecs, sum_vec));
-  xfree(sum_vec);
-  RB_GC_GUARD(criterion);
-  return ret;
-}
-void Init_rumaleext(void) {
-  VALUE mRumale = rb_define_module("Rumale");
-  VALUE mTree = rb_define_module_under(mRumale, "Tree");
-  /**
-   * Document-module: Rumale::Tree::ExtDecisionTreeClassifier
-   * @!visibility private
-   * The mixin module consisting of extension method for DecisionTreeClassifier class.
-   * This module is used internally.
-   */
-  VALUE mExtDTreeCls = rb_define_module_under(mTree, "ExtDecisionTreeClassifier");
-  /**
-   * Document-module: Rumale::Tree::ExtDecisionTreeRegressor
-   * @!visibility private
-   * The mixin module consisting of extension method for DecisionTreeRegressor class.
-   * This module is used internally.
-   */
-  VALUE mExtDTreeReg = rb_define_module_under(mTree, "ExtDecisionTreeRegressor");
-  /**
-   * Document-module: Rumale::Tree::ExtGradientTreeRegressor
-   * @!visibility private
-   * The mixin module consisting of extension method for GradientTreeRegressor class.
-   * This module is used internally.
-   */
-  VALUE mExtGTreeReg = rb_define_module_under(mTree, "ExtGradientTreeRegressor");
-  rb_define_private_method(mExtDTreeCls, "find_split_params", find_split_params_cls, 6);
-  rb_define_private_method(mExtDTreeReg, "find_split_params", find_split_params_reg, 5);
-  rb_define_private_method(mExtGTreeReg, "find_split_params", find_split_params_grad_reg, 7);
-  rb_define_private_method(mExtDTreeCls, "node_impurity", node_impurity_cls, 4);
-  rb_define_private_method(mExtDTreeReg, "node_impurity", node_impurity_reg, 2);
-}

data/ext/rumale/rumaleext.h DELETED Viewed

@@ -1,12 +0,0 @@
-#ifndef RUMALEEXT_H
-#define RUMALEEXT_H 1
-#include <math.h>
-#include <string.h>
-#include <ruby.h>
-#include <numo/narray.h>
-#include <numo/template.h>
-#endif /* RUMALEEXT_H */