rumale 0.12.8 → 0.12.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: cbe91a2e4b8e16a5cf37ae8141ad73a760dbeac2
4
- data.tar.gz: eac618abc865d6a9a8458afeec91b8f86fcc7b98
3
+ metadata.gz: c55e2ab90432838616c16fdf35d4eac150cc02b8
4
+ data.tar.gz: c605feef7c8d3d7dce4e8330419ba88288d17f74
5
5
  SHA512:
6
- metadata.gz: d980b9f3fb0d13bd71a672b4cdc39cef616ec354df5581ec7fb3e793059c0847be8d008a50297328b9a78b7a2b0c146b65aa667b72b9becf56e0f9b1fb279955
7
- data.tar.gz: 3a24e75f869e64a922202b5e81122f8c19a813fade58914788cd5c239c0c53a27ce5e9bbb7ed1c8c62a11d1f0fa54eb43244e52502f45c4907866b92fd01b405
6
+ metadata.gz: f3ec59d17a66d74d978860537271c0d7c8881924cce6589345d43079897879ac603b6c01c7b0884419457e4bf6a99187345d203e8638be6d96aabe1ce513560f
7
+ data.tar.gz: 86f0cbf4c92b72b9caff2e5a9ed39b47013e4c11bdacf6661148a01f3c69a72253bc8690fa5c28207888461b8bc1070f39b87bc23df11866b9018d61cd37b2fd
@@ -1,3 +1,7 @@
1
+ # 0.12.9
2
+ - Add class for K-Medoids clustering.
3
+ - Fix extension codes of decision tree regressor for using Numo::NArray.
4
+
1
5
  # 0.12.8
2
6
  - Fix bug that fails to build and install on Windows again. Fix extconf to add Numo::NArray libraries to $lib.
3
7
 
data/README.md CHANGED
@@ -6,14 +6,14 @@
6
6
  [![Coverage Status](https://coveralls.io/repos/github/yoshoku/rumale/badge.svg?branch=master)](https://coveralls.io/github/yoshoku/rumale?branch=master)
7
7
  [![Gem Version](https://badge.fury.io/rb/rumale.svg)](https://badge.fury.io/rb/rumale)
8
8
  [![BSD 2-Clause License](https://img.shields.io/badge/License-BSD%202--Clause-orange.svg)](https://github.com/yoshoku/rumale/blob/master/LICENSE.txt)
9
- [![Documentation](http://img.shields.io/badge/docs-rdoc.info-blue.svg)](https://www.rubydoc.info/gems/rumale/0.12.8)
9
+ [![Documentation](http://img.shields.io/badge/docs-rdoc.info-blue.svg)](https://www.rubydoc.info/gems/rumale/0.12.9)
10
10
 
11
11
  Rumale (**Ru**by **ma**chine **le**arning) is a machine learning library in Ruby.
12
12
  Rumale provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
13
13
  Rumale supports Linear / Kernel Support Vector Machine,
14
14
  Logistic Regression, Linear Regression, Ridge, Lasso, Factorization Machine,
15
15
  Naive Bayes, Decision Tree, AdaBoost, Gradient Tree Boosting, Random Forest, Extra-Trees, K-nearest neighbor classifier,
16
- K-Means, Gaussian Mixture Model, DBSCAN, Power Iteration Clustering,
16
+ K-Means, K-Medoids, Gaussian Mixture Model, DBSCAN, Power Iteration Clustering,
17
17
  Mutidimensional Scaling, t-SNE, Principal Component Analysis, and Non-negative Matrix Factorization.
18
18
 
19
19
  This project was formerly known as "SVMKit".
@@ -131,9 +131,9 @@ calc_impurity_cls(const char* criterion, VALUE histogram, const long n_elements)
131
131
  }
132
132
 
133
133
  double
134
- calc_impurity_reg(VALUE criterion, VALUE target_vecs, VALUE sum_vec)
134
+ calc_impurity_reg(const char* criterion, VALUE target_vecs, VALUE sum_vec)
135
135
  {
136
- if (strcmp(StringValuePtr(criterion), "mae") == 0) {
136
+ if (strcmp(criterion, "mae") == 0) {
137
137
  return calc_mae(target_vecs, sum_vec);
138
138
  }
139
139
  return calc_mse(target_vecs, sum_vec);
@@ -286,83 +286,115 @@ find_split_params_cls(VALUE self, VALUE criterion, VALUE impurity, VALUE order,
286
286
 
287
287
  /**
288
288
  * @!visibility private
289
- * Find for split point with maximum information gain.
290
- *
291
- * @overload find_split_params(criterion, impurity, sorted_features, sorted_targets) -> Array<Float>
292
- *
293
- * @param criterion [String] The function to evaluate spliting point. Supported criteria are 'mae' and 'mse'.
294
- * @param impurity [Float] The impurity of whole dataset.
295
- * @param sorted_features [Numo::DFloat] (shape: [n_samples]) The feature values sorted in ascending order.
296
- * @param sorted_targets [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values sorted according to feature values.
297
- * @return [Float] The array consists of optimal parameters including impurities of child nodes, threshold, and gain.
298
289
  */
299
- static VALUE
300
- find_split_params_reg(VALUE self, VALUE criterion, VALUE whole_impurity, VALUE sorted_f, VALUE sorted_y)
290
+ typedef struct {
291
+ char* criterion;
292
+ double impurity;
293
+ } split_opts_reg;
294
+ /**
295
+ * @!visibility private
296
+ */
297
+ static void
298
+ iter_find_split_params_reg(na_loop_t const* lp)
301
299
  {
302
- const long n_elements = RARRAY_LEN(sorted_f);
303
- const long n_dimensions = RARRAY_LEN(rb_ary_entry(sorted_y, 0));
304
- const double w_impurity = NUM2DBL(whole_impurity);
305
- long iter = 0;
300
+ const int32_t* o = (int32_t*)NDL_PTR(lp, 0);
301
+ const double* f = (double*)NDL_PTR(lp, 1);
302
+ const double* y = (double*)NDL_PTR(lp, 2);
303
+ const long n_elements = NDL_SHAPE(lp, 0)[0];
304
+ const long n_outputs = NDL_SHAPE(lp, 2)[1];
305
+ const char* criterion = ((split_opts_reg*)lp->opt_ptr)->criterion;
306
+ const double w_impurity = ((split_opts_reg*)lp->opt_ptr)->impurity;
307
+ double* params = (double*)NDL_PTR(lp, 3);
308
+ long i, j;
306
309
  long curr_pos = 0;
307
310
  long next_pos = 0;
308
311
  long n_l_elements = 0;
309
312
  long n_r_elements = n_elements;
310
- double last_el = NUM2DBL(rb_ary_entry(sorted_f, n_elements - 1));
311
- double curr_el = NUM2DBL(rb_ary_entry(sorted_f, 0));
313
+ double curr_el = f[o[0]];
314
+ double last_el = f[o[n_elements - 1]];
312
315
  double next_el;
313
316
  double l_impurity;
314
317
  double r_impurity;
315
318
  double gain;
316
- VALUE l_sum_vec = create_zero_vector(n_dimensions);
317
- VALUE r_sum_vec = create_zero_vector(n_dimensions);
319
+ VALUE l_sum_vec = create_zero_vector(n_outputs);
320
+ VALUE r_sum_vec = create_zero_vector(n_outputs);
318
321
  VALUE l_target_vecs = rb_ary_new();
319
322
  VALUE r_target_vecs = rb_ary_new();
320
323
  VALUE target;
321
- VALUE opt_params = rb_ary_new2(4);
322
324
 
323
325
  /* Initialize optimal parameters. */
324
- rb_ary_store(opt_params, 0, DBL2NUM(0)); /* left impurity */
325
- rb_ary_store(opt_params, 1, DBL2NUM(w_impurity)); /* right impurity */
326
- rb_ary_store(opt_params, 2, rb_ary_entry(sorted_f, 0)); /* threshold */
327
- rb_ary_store(opt_params, 3, DBL2NUM(0)); /* gain */
326
+ params[0] = 0.0; /* left impurity */
327
+ params[1] = w_impurity; /* right impurity */
328
+ params[2] = curr_el; /* threshold */
329
+ params[3] = 0.0; /* gain */
328
330
 
329
331
  /* Initialize child node variables. */
330
- for (iter = 0; iter < n_elements; iter++) {
331
- target = rb_ary_entry(sorted_y, iter);
332
+ for (i = 0; i < n_elements; i++) {
333
+ target = rb_ary_new2(n_outputs);
334
+ for (j = 0; j < n_outputs; j++) {
335
+ rb_ary_store(target, j, DBL2NUM(y[o[i] * n_outputs + j]));
336
+ }
332
337
  add_sum_vec(r_sum_vec, target);
333
338
  rb_ary_push(r_target_vecs, target);
334
339
  }
335
340
 
336
341
  /* Find optimal parameters. */
337
342
  while (curr_pos < n_elements && curr_el != last_el) {
338
- next_el = NUM2DBL(rb_ary_entry(sorted_f, next_pos));
343
+ next_el = f[o[next_pos]];
339
344
  while (next_pos < n_elements && next_el == curr_el) {
340
- target = rb_ary_entry(sorted_y, next_pos);
341
- add_sum_vec(l_sum_vec, target);
345
+ target = rb_ary_shift(r_target_vecs);
346
+ n_r_elements--;
347
+ sub_sum_vec(r_sum_vec, target);
342
348
  rb_ary_push(l_target_vecs, target);
343
349
  n_l_elements++;
344
- sub_sum_vec(r_sum_vec, target);
345
- rb_ary_shift(r_target_vecs);
346
- n_r_elements--;
347
- next_el = NUM2DBL(rb_ary_entry(sorted_f, ++next_pos));
350
+ add_sum_vec(l_sum_vec, target);
351
+ next_pos++;
352
+ next_el = f[o[next_pos]];
348
353
  }
349
354
  /* Calculate gain of new split. */
350
355
  l_impurity = calc_impurity_reg(criterion, l_target_vecs, l_sum_vec);
351
356
  r_impurity = calc_impurity_reg(criterion, r_target_vecs, r_sum_vec);
352
357
  gain = w_impurity - (n_l_elements * l_impurity + n_r_elements * r_impurity) / n_elements;
353
358
  /* Update optimal parameters. */
354
- if (gain > NUM2DBL(rb_ary_entry(opt_params, 3))) {
355
- rb_ary_store(opt_params, 0, DBL2NUM(l_impurity));
356
- rb_ary_store(opt_params, 1, DBL2NUM(r_impurity));
357
- rb_ary_store(opt_params, 2, DBL2NUM(0.5 * (curr_el + next_el)));
358
- rb_ary_store(opt_params, 3, DBL2NUM(gain));
359
+ if (gain > params[3]) {
360
+ params[0] = l_impurity;
361
+ params[1] = r_impurity;
362
+ params[2] = 0.5 * (curr_el + next_el);
363
+ params[3] = gain;
359
364
  }
360
365
  if (next_pos == n_elements) break;
361
366
  curr_pos = next_pos;
362
- curr_el = NUM2DBL(rb_ary_entry(sorted_f, curr_pos));
367
+ curr_el = f[o[curr_pos]];
363
368
  }
364
-
365
- return opt_params;
369
+ }
370
+ /**
371
+ * @!visibility private
372
+ * Find for split point with maximum information gain.
373
+ *
374
+ * @overload find_split_params(criterion, impurity, order, features, targets) -> Array<Float>
375
+ *
376
+ * @param criterion [String] The function to evaluate spliting point. Supported criteria are 'mae' and 'mse'.
377
+ * @param impurity [Float] The impurity of whole dataset.
378
+ * @param order [Numo::Int32] (shape: [n_samples]) The element indices sorted according to feature values in ascending order.
379
+ * @param features [Numo::DFloat] (shape: [n_samples]) The feature values.
380
+ * @param targets [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values.
381
+ * @return [Array<Float>] The array consists of optimal parameters including impurities of child nodes, threshold, and gain.
382
+ */
383
+ static VALUE
384
+ find_split_params_reg(VALUE self, VALUE criterion, VALUE impurity, VALUE order, VALUE features, VALUE targets)
385
+ {
386
+ ndfunc_arg_in_t ain[3] = { {numo_cInt32, 1}, {numo_cDFloat, 1}, {numo_cDFloat, 2} };
387
+ size_t out_shape[1] = { 4 };
388
+ ndfunc_arg_out_t aout[1] = { {numo_cDFloat, 1, out_shape} };
389
+ ndfunc_t ndf = { (na_iter_func_t)iter_find_split_params_reg, NO_LOOP, 3, 1, ain, aout };
390
+ split_opts_reg opts = { StringValuePtr(criterion), NUM2DBL(impurity) };
391
+ VALUE params = na_ndloop3(&ndf, &opts, 3, order, features, targets);
392
+ VALUE results = rb_ary_new2(4);
393
+ rb_ary_store(results, 0, DBL2NUM(((double*)na_get_pointer_for_read(params))[0]));
394
+ rb_ary_store(results, 1, DBL2NUM(((double*)na_get_pointer_for_read(params))[1]));
395
+ rb_ary_store(results, 2, DBL2NUM(((double*)na_get_pointer_for_read(params))[2]));
396
+ rb_ary_store(results, 3, DBL2NUM(((double*)na_get_pointer_for_read(params))[3]));
397
+ return results;
366
398
  }
367
399
 
368
400
  /**
@@ -487,7 +519,7 @@ node_impurity_cls(VALUE self, VALUE criterion, VALUE y_nary, VALUE n_elements_,
487
519
  * @overload node_impurity(criterion, y) -> Float
488
520
  *
489
521
  * @param criterion [String] The function to calculate impurity. Supported criteria are 'mae' and 'mse'.
490
- * @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The taget values.
522
+ * @param y [Array<Float>] (shape: [n_samples, n_outputs]) The taget values.
491
523
  * @return [Float] impurity
492
524
  */
493
525
  static VALUE
@@ -495,8 +527,8 @@ node_impurity_reg(VALUE self, VALUE criterion, VALUE y)
495
527
  {
496
528
  long i;
497
529
  const long n_elements = RARRAY_LEN(y);
498
- const long n_dimensions = RARRAY_LEN(rb_ary_entry(y, 0));
499
- VALUE sum_vec = create_zero_vector(n_dimensions);
530
+ const long n_outputs = RARRAY_LEN(rb_ary_entry(y, 0));
531
+ VALUE sum_vec = create_zero_vector(n_outputs);
500
532
  VALUE target_vecs = rb_ary_new();
501
533
  VALUE target;
502
534
 
@@ -506,7 +538,7 @@ node_impurity_reg(VALUE self, VALUE criterion, VALUE y)
506
538
  rb_ary_push(target_vecs, target);
507
539
  }
508
540
 
509
- return DBL2NUM(calc_impurity_reg(criterion, target_vecs, sum_vec));
541
+ return DBL2NUM(calc_impurity_reg(StringValuePtr(criterion), target_vecs, sum_vec));
510
542
  }
511
543
 
512
544
  void Init_rumale(void)
@@ -536,7 +568,7 @@ void Init_rumale(void)
536
568
  VALUE mExtGTreeReg = rb_define_module_under(mTree, "ExtGradientTreeRegressor");
537
569
 
538
570
  rb_define_private_method(mExtDTreeCls, "find_split_params", find_split_params_cls, 6);
539
- rb_define_private_method(mExtDTreeReg, "find_split_params", find_split_params_reg, 4);
571
+ rb_define_private_method(mExtDTreeReg, "find_split_params", find_split_params_reg, 5);
540
572
  rb_define_private_method(mExtGTreeReg, "find_split_params", find_split_params_grad_reg, 7);
541
573
  rb_define_private_method(mExtDTreeCls, "node_impurity", node_impurity_cls, 4);
542
574
  rb_define_private_method(mExtDTreeReg, "node_impurity", node_impurity_reg, 2);
@@ -57,6 +57,7 @@ require 'rumale/ensemble/random_forest_regressor'
57
57
  require 'rumale/ensemble/extra_trees_classifier'
58
58
  require 'rumale/ensemble/extra_trees_regressor'
59
59
  require 'rumale/clustering/k_means'
60
+ require 'rumale/clustering/k_medoids'
60
61
  require 'rumale/clustering/gaussian_mixture'
61
62
  require 'rumale/clustering/dbscan'
62
63
  require 'rumale/clustering/power_iteration'
@@ -0,0 +1,157 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/base_estimator'
4
+ require 'rumale/base/cluster_analyzer'
5
+ require 'rumale/pairwise_metric'
6
+
7
+ module Rumale
8
+ module Clustering
9
+ # KMedoids is a class that implements K-Medoids cluster analysis.
10
+ #
11
+ # @example
12
+ # analyzer = Rumale::Clustering::KMedoids.new(n_clusters: 10, max_iter: 50)
13
+ # cluster_labels = analyzer.fit_predict(samples)
14
+ #
15
+ # *Reference*
16
+ # - D. Arthur and S. Vassilvitskii, "k-means++: the advantages of careful seeding," Proc. SODA'07, pp. 1027--1035, 2007.
17
+ class KMedoids
18
+ include Base::BaseEstimator
19
+ include Base::ClusterAnalyzer
20
+
21
+ # Return the indices of medoids.
22
+ # @return [Numo::Int32] (shape: [n_clusters])
23
+ attr_reader :medoid_ids
24
+
25
+ # Return the random generator.
26
+ # @return [Random]
27
+ attr_reader :rng
28
+
29
+ # Create a new cluster analyzer with K-Medoids method.
30
+ #
31
+ # @param n_clusters [Integer] The number of clusters.
32
+ # @param metric [String] The metric to calculate the distances in original space.
33
+ # If metric is 'euclidean', Euclidean distance is calculated for distance in original space.
34
+ # If metric is 'precomputed', the fit and fit_transform methods expect to be given a distance matrix.
35
+ # @param init [String] The initialization method for centroids ('random' or 'k-means++').
36
+ # @param max_iter [Integer] The maximum number of iterations.
37
+ # @param tol [Float] The tolerance of termination criterion.
38
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
39
+ def initialize(n_clusters: 8, metric: 'euclidean', init: 'k-means++', max_iter: 50, tol: 1.0e-4, random_seed: nil)
40
+ check_params_integer(n_clusters: n_clusters, max_iter: max_iter)
41
+ check_params_float(tol: tol)
42
+ check_params_string(metric: metric, init: init)
43
+ check_params_type_or_nil(Integer, random_seed: random_seed)
44
+ check_params_positive(n_clusters: n_clusters, max_iter: max_iter)
45
+ @params = {}
46
+ @params[:n_clusters] = n_clusters
47
+ @params[:metric] = metric == 'precomputed' ? 'precomputed' : 'euclidean'
48
+ @params[:init] = init == 'random' ? 'random' : 'k-means++'
49
+ @params[:max_iter] = max_iter
50
+ @params[:tol] = tol
51
+ @params[:random_seed] = random_seed
52
+ @params[:random_seed] ||= srand
53
+ @medoid_ids = nil
54
+ @cluster_centers = nil
55
+ @rng = Random.new(@params[:random_seed])
56
+ end
57
+
58
+ # Analysis clusters with given training data.
59
+ #
60
+ # @overload fit(x) -> KMedoids
61
+ #
62
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
63
+ # If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
64
+ # @return [KMedoids] The learned cluster analyzer itself.
65
+ def fit(x, _not_used = nil)
66
+ check_sample_array(x)
67
+ raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
68
+ # initialize some varibales.
69
+ distance_mat = @params[:metric] == 'precomputed' ? x : Rumale::PairwiseMetric.euclidean_distance(x)
70
+ init_cluster_centers(distance_mat)
71
+ error = distance_mat[true, @medoid_ids].mean
72
+ @params[:max_iter].times do |_t|
73
+ cluster_labels = assign_cluster(distance_mat[true, @medoid_ids])
74
+ @params[:n_clusters].times do |n|
75
+ assigned_ids = cluster_labels.eq(n).where
76
+ @medoid_ids[n] = assigned_ids[distance_mat[assigned_ids, assigned_ids].sum(axis: 1).min_index]
77
+ end
78
+ new_error = distance_mat[true, @medoid_ids].mean
79
+ break if (error - new_error).abs <= @params[:tol]
80
+ error = new_error
81
+ end
82
+ @cluster_centers = x[@medoid_ids, true].dup if @params[:metric] == 'euclidean'
83
+ self
84
+ end
85
+
86
+ # Predict cluster labels for samples.
87
+ #
88
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the cluster label.
89
+ # If the metric is 'precomputed', x must be distances between samples and medoids (shape: [n_samples, n_clusters]).
90
+ # @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
91
+ def predict(x)
92
+ check_sample_array(x)
93
+ distance_mat = @params[:metric] == 'precomputed' ? x : Rumale::PairwiseMetric.euclidean_distance(x, @cluster_centers)
94
+ if @params[:metric] == 'precomputed' && distance_mat.shape[1] != @medoid_ids.size
95
+ raise ArgumentError, 'Expect the size input matrix to be n_samples-by-n_clusters.'
96
+ end
97
+ assign_cluster(distance_mat)
98
+ end
99
+
100
+ # Analysis clusters and assign samples to clusters.
101
+ #
102
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for cluster analysis.
103
+ # If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
104
+ # @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
105
+ def fit_predict(x)
106
+ check_sample_array(x)
107
+ fit(x)
108
+ if @params[:metric] == 'precomputed'
109
+ predict(x[true, @medoid_ids])
110
+ else
111
+ predict(x)
112
+ end
113
+ end
114
+
115
+ # Dump marshal data.
116
+ # @return [Hash] The marshal data.
117
+ def marshal_dump
118
+ { params: @params,
119
+ medoid_ids: @medoid_ids,
120
+ cluster_centers: @cluster_centers,
121
+ rng: @rng }
122
+ end
123
+
124
+ # Load marshal data.
125
+ # @return [nil]
126
+ def marshal_load(obj)
127
+ @params = obj[:params]
128
+ @medoid_ids = obj[:medoid_ids]
129
+ @cluster_centers = obj[:cluster_centers]
130
+ @rng = obj[:rng]
131
+ nil
132
+ end
133
+
134
+ private
135
+
136
+ def assign_cluster(distances_to_medoids)
137
+ distances_to_medoids.min_index(axis: 1) - Numo::Int32[*0.step(distances_to_medoids.size - 1, @params[:n_clusters])]
138
+ end
139
+
140
+ def init_cluster_centers(distance_mat)
141
+ # random initialize
142
+ n_samples = distance_mat.shape[0]
143
+ sub_rng = @rng.dup
144
+ @medoid_ids = Numo::Int32.asarray([*0...n_samples].sample(@params[:n_clusters], random: sub_rng))
145
+ return unless @params[:init] == 'k-means++'
146
+ # k-means++ initialize
147
+ (1...@params[:n_clusters]).each do |n|
148
+ distances = distance_mat[true, @medoid_ids[0...n]]
149
+ min_distances = distances.flatten[distances.min_index(axis: 1)]
150
+ probs = min_distances**2 / (min_distances**2).sum
151
+ cum_probs = probs.cumsum
152
+ @medoid_ids[n] = cum_probs.gt(sub_rng.rand).where.to_a.first
153
+ end
154
+ end
155
+ end
156
+ end
157
+ end
@@ -126,11 +126,8 @@ module Rumale
126
126
  node
127
127
  end
128
128
 
129
- def best_split(features, y, whole_impurity)
130
- order = features.sort_index
131
- sorted_f = features[order].to_a
132
- sorted_y = y[order, true].to_a
133
- find_split_params(@params[:criterion], whole_impurity, sorted_f, sorted_y)
129
+ def best_split(f, y, impurity)
130
+ find_split_params(@params[:criterion], impurity, f.sort_index, f, y)
134
131
  end
135
132
 
136
133
  def impurity(y)
@@ -3,5 +3,5 @@
3
3
  # Rumale is a machine learning library in Ruby.
4
4
  module Rumale
5
5
  # The version of Rumale you are using.
6
- VERSION = '0.12.8'
6
+ VERSION = '0.12.9'
7
7
  end
@@ -19,7 +19,7 @@ Gem::Specification.new do |spec|
19
19
  Rumale currently supports Linear / Kernel Support Vector Machine,
20
20
  Logistic Regression, Linear Regression, Ridge, Lasso, Factorization Machine,
21
21
  Naive Bayes, Decision Tree, AdaBoost, Gradient Tree Boosting, Random Forest, Extra-Trees, K-nearest neighbor algorithm,
22
- K-Means, Gaussian Mixture Model, DBSCAN, Power Iteration Clustering,
22
+ K-Means, K-Medoids, Gaussian Mixture Model, DBSCAN, Power Iteration Clustering,
23
23
  Multidimensional Scaling, t-SNE, Principal Component Analysis, and Non-negative Matrix Factorization.
24
24
  MSG
25
25
  spec.homepage = 'https://github.com/yoshoku/rumale'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rumale
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.12.8
4
+ version: 0.12.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-07-15 00:00:00.000000000 Z
11
+ date: 2019-07-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray
@@ -114,7 +114,7 @@ description: |
114
114
  Rumale currently supports Linear / Kernel Support Vector Machine,
115
115
  Logistic Regression, Linear Regression, Ridge, Lasso, Factorization Machine,
116
116
  Naive Bayes, Decision Tree, AdaBoost, Gradient Tree Boosting, Random Forest, Extra-Trees, K-nearest neighbor algorithm,
117
- K-Means, Gaussian Mixture Model, DBSCAN, Power Iteration Clustering,
117
+ K-Means, K-Medoids, Gaussian Mixture Model, DBSCAN, Power Iteration Clustering,
118
118
  Multidimensional Scaling, t-SNE, Principal Component Analysis, and Non-negative Matrix Factorization.
119
119
  email:
120
120
  - yoshoku@outlook.com
@@ -150,6 +150,7 @@ files:
150
150
  - lib/rumale/clustering/dbscan.rb
151
151
  - lib/rumale/clustering/gaussian_mixture.rb
152
152
  - lib/rumale/clustering/k_means.rb
153
+ - lib/rumale/clustering/k_medoids.rb
153
154
  - lib/rumale/clustering/power_iteration.rb
154
155
  - lib/rumale/dataset.rb
155
156
  - lib/rumale/decomposition/nmf.rb