rumale 0.12.8 → 0.12.9

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: cbe91a2e4b8e16a5cf37ae8141ad73a760dbeac2
4
- data.tar.gz: eac618abc865d6a9a8458afeec91b8f86fcc7b98
3
+ metadata.gz: c55e2ab90432838616c16fdf35d4eac150cc02b8
4
+ data.tar.gz: c605feef7c8d3d7dce4e8330419ba88288d17f74
5
5
  SHA512:
6
- metadata.gz: d980b9f3fb0d13bd71a672b4cdc39cef616ec354df5581ec7fb3e793059c0847be8d008a50297328b9a78b7a2b0c146b65aa667b72b9becf56e0f9b1fb279955
7
- data.tar.gz: 3a24e75f869e64a922202b5e81122f8c19a813fade58914788cd5c239c0c53a27ce5e9bbb7ed1c8c62a11d1f0fa54eb43244e52502f45c4907866b92fd01b405
6
+ metadata.gz: f3ec59d17a66d74d978860537271c0d7c8881924cce6589345d43079897879ac603b6c01c7b0884419457e4bf6a99187345d203e8638be6d96aabe1ce513560f
7
+ data.tar.gz: 86f0cbf4c92b72b9caff2e5a9ed39b47013e4c11bdacf6661148a01f3c69a72253bc8690fa5c28207888461b8bc1070f39b87bc23df11866b9018d61cd37b2fd
@@ -1,3 +1,7 @@
1
+ # 0.12.9
2
+ - Add class for K-Medoids clustering.
3
+ - Fix extension codes of decision tree regressor for using Numo::NArray.
4
+
1
5
  # 0.12.8
2
6
  - Fix bug that fails to build and install on Windows again. Fix extconf to add Numo::NArray libraries to $lib.
3
7
 
data/README.md CHANGED
@@ -6,14 +6,14 @@
6
6
  [![Coverage Status](https://coveralls.io/repos/github/yoshoku/rumale/badge.svg?branch=master)](https://coveralls.io/github/yoshoku/rumale?branch=master)
7
7
  [![Gem Version](https://badge.fury.io/rb/rumale.svg)](https://badge.fury.io/rb/rumale)
8
8
  [![BSD 2-Clause License](https://img.shields.io/badge/License-BSD%202--Clause-orange.svg)](https://github.com/yoshoku/rumale/blob/master/LICENSE.txt)
9
- [![Documentation](http://img.shields.io/badge/docs-rdoc.info-blue.svg)](https://www.rubydoc.info/gems/rumale/0.12.8)
9
+ [![Documentation](http://img.shields.io/badge/docs-rdoc.info-blue.svg)](https://www.rubydoc.info/gems/rumale/0.12.9)
10
10
 
11
11
  Rumale (**Ru**by **ma**chine **le**arning) is a machine learning library in Ruby.
12
12
  Rumale provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
13
13
  Rumale supports Linear / Kernel Support Vector Machine,
14
14
  Logistic Regression, Linear Regression, Ridge, Lasso, Factorization Machine,
15
15
  Naive Bayes, Decision Tree, AdaBoost, Gradient Tree Boosting, Random Forest, Extra-Trees, K-nearest neighbor classifier,
16
- K-Means, Gaussian Mixture Model, DBSCAN, Power Iteration Clustering,
16
+ K-Means, K-Medoids, Gaussian Mixture Model, DBSCAN, Power Iteration Clustering,
17
17
  Mutidimensional Scaling, t-SNE, Principal Component Analysis, and Non-negative Matrix Factorization.
18
18
 
19
19
  This project was formerly known as "SVMKit".
@@ -131,9 +131,9 @@ calc_impurity_cls(const char* criterion, VALUE histogram, const long n_elements)
131
131
  }
132
132
 
133
133
  double
134
- calc_impurity_reg(VALUE criterion, VALUE target_vecs, VALUE sum_vec)
134
+ calc_impurity_reg(const char* criterion, VALUE target_vecs, VALUE sum_vec)
135
135
  {
136
- if (strcmp(StringValuePtr(criterion), "mae") == 0) {
136
+ if (strcmp(criterion, "mae") == 0) {
137
137
  return calc_mae(target_vecs, sum_vec);
138
138
  }
139
139
  return calc_mse(target_vecs, sum_vec);
@@ -286,83 +286,115 @@ find_split_params_cls(VALUE self, VALUE criterion, VALUE impurity, VALUE order,
286
286
 
287
287
  /**
288
288
  * @!visibility private
289
- * Find for split point with maximum information gain.
290
- *
291
- * @overload find_split_params(criterion, impurity, sorted_features, sorted_targets) -> Array<Float>
292
- *
293
- * @param criterion [String] The function to evaluate spliting point. Supported criteria are 'mae' and 'mse'.
294
- * @param impurity [Float] The impurity of whole dataset.
295
- * @param sorted_features [Numo::DFloat] (shape: [n_samples]) The feature values sorted in ascending order.
296
- * @param sorted_targets [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values sorted according to feature values.
297
- * @return [Float] The array consists of optimal parameters including impurities of child nodes, threshold, and gain.
298
289
  */
299
- static VALUE
300
- find_split_params_reg(VALUE self, VALUE criterion, VALUE whole_impurity, VALUE sorted_f, VALUE sorted_y)
290
+ typedef struct {
291
+ char* criterion;
292
+ double impurity;
293
+ } split_opts_reg;
294
+ /**
295
+ * @!visibility private
296
+ */
297
+ static void
298
+ iter_find_split_params_reg(na_loop_t const* lp)
301
299
  {
302
- const long n_elements = RARRAY_LEN(sorted_f);
303
- const long n_dimensions = RARRAY_LEN(rb_ary_entry(sorted_y, 0));
304
- const double w_impurity = NUM2DBL(whole_impurity);
305
- long iter = 0;
300
+ const int32_t* o = (int32_t*)NDL_PTR(lp, 0);
301
+ const double* f = (double*)NDL_PTR(lp, 1);
302
+ const double* y = (double*)NDL_PTR(lp, 2);
303
+ const long n_elements = NDL_SHAPE(lp, 0)[0];
304
+ const long n_outputs = NDL_SHAPE(lp, 2)[1];
305
+ const char* criterion = ((split_opts_reg*)lp->opt_ptr)->criterion;
306
+ const double w_impurity = ((split_opts_reg*)lp->opt_ptr)->impurity;
307
+ double* params = (double*)NDL_PTR(lp, 3);
308
+ long i, j;
306
309
  long curr_pos = 0;
307
310
  long next_pos = 0;
308
311
  long n_l_elements = 0;
309
312
  long n_r_elements = n_elements;
310
- double last_el = NUM2DBL(rb_ary_entry(sorted_f, n_elements - 1));
311
- double curr_el = NUM2DBL(rb_ary_entry(sorted_f, 0));
313
+ double curr_el = f[o[0]];
314
+ double last_el = f[o[n_elements - 1]];
312
315
  double next_el;
313
316
  double l_impurity;
314
317
  double r_impurity;
315
318
  double gain;
316
- VALUE l_sum_vec = create_zero_vector(n_dimensions);
317
- VALUE r_sum_vec = create_zero_vector(n_dimensions);
319
+ VALUE l_sum_vec = create_zero_vector(n_outputs);
320
+ VALUE r_sum_vec = create_zero_vector(n_outputs);
318
321
  VALUE l_target_vecs = rb_ary_new();
319
322
  VALUE r_target_vecs = rb_ary_new();
320
323
  VALUE target;
321
- VALUE opt_params = rb_ary_new2(4);
322
324
 
323
325
  /* Initialize optimal parameters. */
324
- rb_ary_store(opt_params, 0, DBL2NUM(0)); /* left impurity */
325
- rb_ary_store(opt_params, 1, DBL2NUM(w_impurity)); /* right impurity */
326
- rb_ary_store(opt_params, 2, rb_ary_entry(sorted_f, 0)); /* threshold */
327
- rb_ary_store(opt_params, 3, DBL2NUM(0)); /* gain */
326
+ params[0] = 0.0; /* left impurity */
327
+ params[1] = w_impurity; /* right impurity */
328
+ params[2] = curr_el; /* threshold */
329
+ params[3] = 0.0; /* gain */
328
330
 
329
331
  /* Initialize child node variables. */
330
- for (iter = 0; iter < n_elements; iter++) {
331
- target = rb_ary_entry(sorted_y, iter);
332
+ for (i = 0; i < n_elements; i++) {
333
+ target = rb_ary_new2(n_outputs);
334
+ for (j = 0; j < n_outputs; j++) {
335
+ rb_ary_store(target, j, DBL2NUM(y[o[i] * n_outputs + j]));
336
+ }
332
337
  add_sum_vec(r_sum_vec, target);
333
338
  rb_ary_push(r_target_vecs, target);
334
339
  }
335
340
 
336
341
  /* Find optimal parameters. */
337
342
  while (curr_pos < n_elements && curr_el != last_el) {
338
- next_el = NUM2DBL(rb_ary_entry(sorted_f, next_pos));
343
+ next_el = f[o[next_pos]];
339
344
  while (next_pos < n_elements && next_el == curr_el) {
340
- target = rb_ary_entry(sorted_y, next_pos);
341
- add_sum_vec(l_sum_vec, target);
345
+ target = rb_ary_shift(r_target_vecs);
346
+ n_r_elements--;
347
+ sub_sum_vec(r_sum_vec, target);
342
348
  rb_ary_push(l_target_vecs, target);
343
349
  n_l_elements++;
344
- sub_sum_vec(r_sum_vec, target);
345
- rb_ary_shift(r_target_vecs);
346
- n_r_elements--;
347
- next_el = NUM2DBL(rb_ary_entry(sorted_f, ++next_pos));
350
+ add_sum_vec(l_sum_vec, target);
351
+ next_pos++;
352
+ next_el = f[o[next_pos]];
348
353
  }
349
354
  /* Calculate gain of new split. */
350
355
  l_impurity = calc_impurity_reg(criterion, l_target_vecs, l_sum_vec);
351
356
  r_impurity = calc_impurity_reg(criterion, r_target_vecs, r_sum_vec);
352
357
  gain = w_impurity - (n_l_elements * l_impurity + n_r_elements * r_impurity) / n_elements;
353
358
  /* Update optimal parameters. */
354
- if (gain > NUM2DBL(rb_ary_entry(opt_params, 3))) {
355
- rb_ary_store(opt_params, 0, DBL2NUM(l_impurity));
356
- rb_ary_store(opt_params, 1, DBL2NUM(r_impurity));
357
- rb_ary_store(opt_params, 2, DBL2NUM(0.5 * (curr_el + next_el)));
358
- rb_ary_store(opt_params, 3, DBL2NUM(gain));
359
+ if (gain > params[3]) {
360
+ params[0] = l_impurity;
361
+ params[1] = r_impurity;
362
+ params[2] = 0.5 * (curr_el + next_el);
363
+ params[3] = gain;
359
364
  }
360
365
  if (next_pos == n_elements) break;
361
366
  curr_pos = next_pos;
362
- curr_el = NUM2DBL(rb_ary_entry(sorted_f, curr_pos));
367
+ curr_el = f[o[curr_pos]];
363
368
  }
364
-
365
- return opt_params;
369
+ }
370
+ /**
371
+ * @!visibility private
372
+ * Find for split point with maximum information gain.
373
+ *
374
+ * @overload find_split_params(criterion, impurity, order, features, targets) -> Array<Float>
375
+ *
376
+ * @param criterion [String] The function to evaluate spliting point. Supported criteria are 'mae' and 'mse'.
377
+ * @param impurity [Float] The impurity of whole dataset.
378
+ * @param order [Numo::Int32] (shape: [n_samples]) The element indices sorted according to feature values in ascending order.
379
+ * @param features [Numo::DFloat] (shape: [n_samples]) The feature values.
380
+ * @param targets [Numo::DFloat] (shape: [n_samples, n_outputs]) The target values.
381
+ * @return [Array<Float>] The array consists of optimal parameters including impurities of child nodes, threshold, and gain.
382
+ */
383
+ static VALUE
384
+ find_split_params_reg(VALUE self, VALUE criterion, VALUE impurity, VALUE order, VALUE features, VALUE targets)
385
+ {
386
+ ndfunc_arg_in_t ain[3] = { {numo_cInt32, 1}, {numo_cDFloat, 1}, {numo_cDFloat, 2} };
387
+ size_t out_shape[1] = { 4 };
388
+ ndfunc_arg_out_t aout[1] = { {numo_cDFloat, 1, out_shape} };
389
+ ndfunc_t ndf = { (na_iter_func_t)iter_find_split_params_reg, NO_LOOP, 3, 1, ain, aout };
390
+ split_opts_reg opts = { StringValuePtr(criterion), NUM2DBL(impurity) };
391
+ VALUE params = na_ndloop3(&ndf, &opts, 3, order, features, targets);
392
+ VALUE results = rb_ary_new2(4);
393
+ rb_ary_store(results, 0, DBL2NUM(((double*)na_get_pointer_for_read(params))[0]));
394
+ rb_ary_store(results, 1, DBL2NUM(((double*)na_get_pointer_for_read(params))[1]));
395
+ rb_ary_store(results, 2, DBL2NUM(((double*)na_get_pointer_for_read(params))[2]));
396
+ rb_ary_store(results, 3, DBL2NUM(((double*)na_get_pointer_for_read(params))[3]));
397
+ return results;
366
398
  }
367
399
 
368
400
  /**
@@ -487,7 +519,7 @@ node_impurity_cls(VALUE self, VALUE criterion, VALUE y_nary, VALUE n_elements_,
487
519
  * @overload node_impurity(criterion, y) -> Float
488
520
  *
489
521
  * @param criterion [String] The function to calculate impurity. Supported criteria are 'mae' and 'mse'.
490
- * @param y [Numo::DFloat] (shape: [n_samples, n_outputs]) The taget values.
522
+ * @param y [Array<Float>] (shape: [n_samples, n_outputs]) The taget values.
491
523
  * @return [Float] impurity
492
524
  */
493
525
  static VALUE
@@ -495,8 +527,8 @@ node_impurity_reg(VALUE self, VALUE criterion, VALUE y)
495
527
  {
496
528
  long i;
497
529
  const long n_elements = RARRAY_LEN(y);
498
- const long n_dimensions = RARRAY_LEN(rb_ary_entry(y, 0));
499
- VALUE sum_vec = create_zero_vector(n_dimensions);
530
+ const long n_outputs = RARRAY_LEN(rb_ary_entry(y, 0));
531
+ VALUE sum_vec = create_zero_vector(n_outputs);
500
532
  VALUE target_vecs = rb_ary_new();
501
533
  VALUE target;
502
534
 
@@ -506,7 +538,7 @@ node_impurity_reg(VALUE self, VALUE criterion, VALUE y)
506
538
  rb_ary_push(target_vecs, target);
507
539
  }
508
540
 
509
- return DBL2NUM(calc_impurity_reg(criterion, target_vecs, sum_vec));
541
+ return DBL2NUM(calc_impurity_reg(StringValuePtr(criterion), target_vecs, sum_vec));
510
542
  }
511
543
 
512
544
  void Init_rumale(void)
@@ -536,7 +568,7 @@ void Init_rumale(void)
536
568
  VALUE mExtGTreeReg = rb_define_module_under(mTree, "ExtGradientTreeRegressor");
537
569
 
538
570
  rb_define_private_method(mExtDTreeCls, "find_split_params", find_split_params_cls, 6);
539
- rb_define_private_method(mExtDTreeReg, "find_split_params", find_split_params_reg, 4);
571
+ rb_define_private_method(mExtDTreeReg, "find_split_params", find_split_params_reg, 5);
540
572
  rb_define_private_method(mExtGTreeReg, "find_split_params", find_split_params_grad_reg, 7);
541
573
  rb_define_private_method(mExtDTreeCls, "node_impurity", node_impurity_cls, 4);
542
574
  rb_define_private_method(mExtDTreeReg, "node_impurity", node_impurity_reg, 2);
@@ -57,6 +57,7 @@ require 'rumale/ensemble/random_forest_regressor'
57
57
  require 'rumale/ensemble/extra_trees_classifier'
58
58
  require 'rumale/ensemble/extra_trees_regressor'
59
59
  require 'rumale/clustering/k_means'
60
+ require 'rumale/clustering/k_medoids'
60
61
  require 'rumale/clustering/gaussian_mixture'
61
62
  require 'rumale/clustering/dbscan'
62
63
  require 'rumale/clustering/power_iteration'
@@ -0,0 +1,157 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/base/base_estimator'
4
+ require 'rumale/base/cluster_analyzer'
5
+ require 'rumale/pairwise_metric'
6
+
7
+ module Rumale
8
+ module Clustering
9
+ # KMedoids is a class that implements K-Medoids cluster analysis.
10
+ #
11
+ # @example
12
+ # analyzer = Rumale::Clustering::KMedoids.new(n_clusters: 10, max_iter: 50)
13
+ # cluster_labels = analyzer.fit_predict(samples)
14
+ #
15
+ # *Reference*
16
+ # - D. Arthur and S. Vassilvitskii, "k-means++: the advantages of careful seeding," Proc. SODA'07, pp. 1027--1035, 2007.
17
+ class KMedoids
18
+ include Base::BaseEstimator
19
+ include Base::ClusterAnalyzer
20
+
21
+ # Return the indices of medoids.
22
+ # @return [Numo::Int32] (shape: [n_clusters])
23
+ attr_reader :medoid_ids
24
+
25
+ # Return the random generator.
26
+ # @return [Random]
27
+ attr_reader :rng
28
+
29
+ # Create a new cluster analyzer with K-Medoids method.
30
+ #
31
+ # @param n_clusters [Integer] The number of clusters.
32
+ # @param metric [String] The metric to calculate the distances in original space.
33
+ # If metric is 'euclidean', Euclidean distance is calculated for distance in original space.
34
+ # If metric is 'precomputed', the fit and fit_transform methods expect to be given a distance matrix.
35
+ # @param init [String] The initialization method for centroids ('random' or 'k-means++').
36
+ # @param max_iter [Integer] The maximum number of iterations.
37
+ # @param tol [Float] The tolerance of termination criterion.
38
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
39
+ def initialize(n_clusters: 8, metric: 'euclidean', init: 'k-means++', max_iter: 50, tol: 1.0e-4, random_seed: nil)
40
+ check_params_integer(n_clusters: n_clusters, max_iter: max_iter)
41
+ check_params_float(tol: tol)
42
+ check_params_string(metric: metric, init: init)
43
+ check_params_type_or_nil(Integer, random_seed: random_seed)
44
+ check_params_positive(n_clusters: n_clusters, max_iter: max_iter)
45
+ @params = {}
46
+ @params[:n_clusters] = n_clusters
47
+ @params[:metric] = metric == 'precomputed' ? 'precomputed' : 'euclidean'
48
+ @params[:init] = init == 'random' ? 'random' : 'k-means++'
49
+ @params[:max_iter] = max_iter
50
+ @params[:tol] = tol
51
+ @params[:random_seed] = random_seed
52
+ @params[:random_seed] ||= srand
53
+ @medoid_ids = nil
54
+ @cluster_centers = nil
55
+ @rng = Random.new(@params[:random_seed])
56
+ end
57
+
58
+ # Analysis clusters with given training data.
59
+ #
60
+ # @overload fit(x) -> KMedoids
61
+ #
62
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
63
+ # If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
64
+ # @return [KMedoids] The learned cluster analyzer itself.
65
+ def fit(x, _not_used = nil)
66
+ check_sample_array(x)
67
+ raise ArgumentError, 'Expect the input distance matrix to be square.' if @params[:metric] == 'precomputed' && x.shape[0] != x.shape[1]
68
+ # initialize some varibales.
69
+ distance_mat = @params[:metric] == 'precomputed' ? x : Rumale::PairwiseMetric.euclidean_distance(x)
70
+ init_cluster_centers(distance_mat)
71
+ error = distance_mat[true, @medoid_ids].mean
72
+ @params[:max_iter].times do |_t|
73
+ cluster_labels = assign_cluster(distance_mat[true, @medoid_ids])
74
+ @params[:n_clusters].times do |n|
75
+ assigned_ids = cluster_labels.eq(n).where
76
+ @medoid_ids[n] = assigned_ids[distance_mat[assigned_ids, assigned_ids].sum(axis: 1).min_index]
77
+ end
78
+ new_error = distance_mat[true, @medoid_ids].mean
79
+ break if (error - new_error).abs <= @params[:tol]
80
+ error = new_error
81
+ end
82
+ @cluster_centers = x[@medoid_ids, true].dup if @params[:metric] == 'euclidean'
83
+ self
84
+ end
85
+
86
+ # Predict cluster labels for samples.
87
+ #
88
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the cluster label.
89
+ # If the metric is 'precomputed', x must be distances between samples and medoids (shape: [n_samples, n_clusters]).
90
+ # @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
91
+ def predict(x)
92
+ check_sample_array(x)
93
+ distance_mat = @params[:metric] == 'precomputed' ? x : Rumale::PairwiseMetric.euclidean_distance(x, @cluster_centers)
94
+ if @params[:metric] == 'precomputed' && distance_mat.shape[1] != @medoid_ids.size
95
+ raise ArgumentError, 'Expect the size input matrix to be n_samples-by-n_clusters.'
96
+ end
97
+ assign_cluster(distance_mat)
98
+ end
99
+
100
+ # Analysis clusters and assign samples to clusters.
101
+ #
102
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for cluster analysis.
103
+ # If the metric is 'precomputed', x must be a square distance matrix (shape: [n_samples, n_samples]).
104
+ # @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
105
+ def fit_predict(x)
106
+ check_sample_array(x)
107
+ fit(x)
108
+ if @params[:metric] == 'precomputed'
109
+ predict(x[true, @medoid_ids])
110
+ else
111
+ predict(x)
112
+ end
113
+ end
114
+
115
+ # Dump marshal data.
116
+ # @return [Hash] The marshal data.
117
+ def marshal_dump
118
+ { params: @params,
119
+ medoid_ids: @medoid_ids,
120
+ cluster_centers: @cluster_centers,
121
+ rng: @rng }
122
+ end
123
+
124
+ # Load marshal data.
125
+ # @return [nil]
126
+ def marshal_load(obj)
127
+ @params = obj[:params]
128
+ @medoid_ids = obj[:medoid_ids]
129
+ @cluster_centers = obj[:cluster_centers]
130
+ @rng = obj[:rng]
131
+ nil
132
+ end
133
+
134
+ private
135
+
136
+ def assign_cluster(distances_to_medoids)
137
+ distances_to_medoids.min_index(axis: 1) - Numo::Int32[*0.step(distances_to_medoids.size - 1, @params[:n_clusters])]
138
+ end
139
+
140
+ def init_cluster_centers(distance_mat)
141
+ # random initialize
142
+ n_samples = distance_mat.shape[0]
143
+ sub_rng = @rng.dup
144
+ @medoid_ids = Numo::Int32.asarray([*0...n_samples].sample(@params[:n_clusters], random: sub_rng))
145
+ return unless @params[:init] == 'k-means++'
146
+ # k-means++ initialize
147
+ (1...@params[:n_clusters]).each do |n|
148
+ distances = distance_mat[true, @medoid_ids[0...n]]
149
+ min_distances = distances.flatten[distances.min_index(axis: 1)]
150
+ probs = min_distances**2 / (min_distances**2).sum
151
+ cum_probs = probs.cumsum
152
+ @medoid_ids[n] = cum_probs.gt(sub_rng.rand).where.to_a.first
153
+ end
154
+ end
155
+ end
156
+ end
157
+ end
@@ -126,11 +126,8 @@ module Rumale
126
126
  node
127
127
  end
128
128
 
129
- def best_split(features, y, whole_impurity)
130
- order = features.sort_index
131
- sorted_f = features[order].to_a
132
- sorted_y = y[order, true].to_a
133
- find_split_params(@params[:criterion], whole_impurity, sorted_f, sorted_y)
129
+ def best_split(f, y, impurity)
130
+ find_split_params(@params[:criterion], impurity, f.sort_index, f, y)
134
131
  end
135
132
 
136
133
  def impurity(y)
@@ -3,5 +3,5 @@
3
3
  # Rumale is a machine learning library in Ruby.
4
4
  module Rumale
5
5
  # The version of Rumale you are using.
6
- VERSION = '0.12.8'
6
+ VERSION = '0.12.9'
7
7
  end
@@ -19,7 +19,7 @@ Gem::Specification.new do |spec|
19
19
  Rumale currently supports Linear / Kernel Support Vector Machine,
20
20
  Logistic Regression, Linear Regression, Ridge, Lasso, Factorization Machine,
21
21
  Naive Bayes, Decision Tree, AdaBoost, Gradient Tree Boosting, Random Forest, Extra-Trees, K-nearest neighbor algorithm,
22
- K-Means, Gaussian Mixture Model, DBSCAN, Power Iteration Clustering,
22
+ K-Means, K-Medoids, Gaussian Mixture Model, DBSCAN, Power Iteration Clustering,
23
23
  Multidimensional Scaling, t-SNE, Principal Component Analysis, and Non-negative Matrix Factorization.
24
24
  MSG
25
25
  spec.homepage = 'https://github.com/yoshoku/rumale'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rumale
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.12.8
4
+ version: 0.12.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-07-15 00:00:00.000000000 Z
11
+ date: 2019-07-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray
@@ -114,7 +114,7 @@ description: |
114
114
  Rumale currently supports Linear / Kernel Support Vector Machine,
115
115
  Logistic Regression, Linear Regression, Ridge, Lasso, Factorization Machine,
116
116
  Naive Bayes, Decision Tree, AdaBoost, Gradient Tree Boosting, Random Forest, Extra-Trees, K-nearest neighbor algorithm,
117
- K-Means, Gaussian Mixture Model, DBSCAN, Power Iteration Clustering,
117
+ K-Means, K-Medoids, Gaussian Mixture Model, DBSCAN, Power Iteration Clustering,
118
118
  Multidimensional Scaling, t-SNE, Principal Component Analysis, and Non-negative Matrix Factorization.
119
119
  email:
120
120
  - yoshoku@outlook.com
@@ -150,6 +150,7 @@ files:
150
150
  - lib/rumale/clustering/dbscan.rb
151
151
  - lib/rumale/clustering/gaussian_mixture.rb
152
152
  - lib/rumale/clustering/k_means.rb
153
+ - lib/rumale/clustering/k_medoids.rb
153
154
  - lib/rumale/clustering/power_iteration.rb
154
155
  - lib/rumale/dataset.rb
155
156
  - lib/rumale/decomposition/nmf.rb