svmkit 0.4.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +4 -0
- data/.rubocop_todo.yml +15 -16
- data/Gemfile +1 -1
- data/HISTORY.md +5 -1
- data/README.md +2 -1
- data/Rakefile +3 -3
- data/lib/svmkit.rb +4 -0
- data/lib/svmkit/base/cluster_analyzer.rb +29 -0
- data/lib/svmkit/clustering/k_means.rb +138 -0
- data/lib/svmkit/evaluation_measure/normalized_mutual_information.rb +63 -0
- data/lib/svmkit/evaluation_measure/purity.rb +41 -0
- data/lib/svmkit/version.rb +1 -1
- data/svmkit.gemspec +2 -2
- metadata +8 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3be3dae5adddfa8bf3655f983082f64601056ce2097671f97873f36f062eea15
|
4
|
+
data.tar.gz: 44bb40d0ec91975d6e4948567f95103434f5792fb4a2be2b87b18079b0b7bb00
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a009b9403935760033ea14c2e7a3027953d28f38f27c3952f49ed69c035eea94ab7305dce4c4a9b3e688f9894eeb3f8511863c1f71640735d16f73e3a1afafe6
|
7
|
+
data.tar.gz: ad9e8198c88047aad39e4caf95872c1616d1cdb94272f8044af621f7eb4990378693a7e0f3073ed0a7dbad3e2e22d7d46055fdb2795c3287ef23fa7efc7ea9d1
|
data/.gitignore
CHANGED
data/.rubocop_todo.yml
CHANGED
@@ -1,19 +1,18 @@
|
|
1
1
|
# This configuration was generated by
|
2
2
|
# `rubocop --auto-gen-config`
|
3
|
-
# on 2018-
|
3
|
+
# on 2018-06-10 12:21:53 +0900 using RuboCop version 0.57.1.
|
4
4
|
# The point is for the user to remove these configuration records
|
5
5
|
# one by one as the offenses are removed from the code base.
|
6
6
|
# Note that changes in the inspected code, or installation of new
|
7
7
|
# versions of RuboCop, may require this file to be generated again.
|
8
8
|
|
9
|
-
# Offense count:
|
10
|
-
#
|
11
|
-
|
12
|
-
Gemspec/RequiredRubyVersion:
|
9
|
+
# Offense count: 2
|
10
|
+
# Cop supports --auto-correct.
|
11
|
+
Layout/ClosingHeredocIndentation:
|
13
12
|
Exclude:
|
14
13
|
- 'svmkit.gemspec'
|
15
14
|
|
16
|
-
# Offense count:
|
15
|
+
# Offense count: 2
|
17
16
|
# Cop supports --auto-correct.
|
18
17
|
# Configuration parameters: EnforcedStyle.
|
19
18
|
# SupportedStyles: auto_detection, squiggly, active_support, powerpack, unindent
|
@@ -21,24 +20,24 @@ Layout/IndentHeredoc:
|
|
21
20
|
Exclude:
|
22
21
|
- 'svmkit.gemspec'
|
23
22
|
|
23
|
+
# Offense count: 1
|
24
|
+
# Cop supports --auto-correct.
|
25
|
+
Layout/LeadingBlankLines:
|
26
|
+
Exclude:
|
27
|
+
- 'svmkit.gemspec'
|
28
|
+
|
24
29
|
# Offense count: 1
|
25
30
|
# Configuration parameters: CountComments, ExcludedMethods.
|
26
31
|
Metrics/BlockLength:
|
27
|
-
Max:
|
32
|
+
Max: 29
|
28
33
|
|
29
|
-
# Offense count:
|
34
|
+
# Offense count: 3
|
30
35
|
Metrics/CyclomaticComplexity:
|
31
36
|
Max: 12
|
32
37
|
|
33
|
-
# Offense count:
|
38
|
+
# Offense count: 3
|
34
39
|
Metrics/PerceivedComplexity:
|
35
|
-
Max:
|
36
|
-
|
37
|
-
# Offense count: 1
|
38
|
-
# Cop supports --auto-correct.
|
39
|
-
Style/Encoding:
|
40
|
-
Exclude:
|
41
|
-
- 'svmkit.gemspec'
|
40
|
+
Max: 13
|
42
41
|
|
43
42
|
# Offense count: 1
|
44
43
|
# Cop supports --auto-correct.
|
data/Gemfile
CHANGED
data/HISTORY.md
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
# 0.5.0
|
2
|
+
- Add class for K-Means clustering.
|
3
|
+
- Add class for evaluating purity.
|
4
|
+
- Add class for evaluating normalized mutual information.
|
5
|
+
|
1
6
|
# 0.4.1
|
2
7
|
- Add class for linear regressor.
|
3
8
|
- Add class for SGD optimizer.
|
@@ -26,7 +31,6 @@ SVMKit plans to add other optimizer algorithms sequentially, so that users can s
|
|
26
31
|
- Remove learning_rate, decay, and momentum parameters on Ridge, Lasso, and FactorizationMachineRegressor.
|
27
32
|
- Remove normalize parameter on SVC, SVR, and LogisticRegression.
|
28
33
|
|
29
|
-
|
30
34
|
# 0.3.3
|
31
35
|
- Add class for Ridge regressor.
|
32
36
|
- Add class for Lasso regressor.
|
data/README.md
CHANGED
@@ -9,7 +9,8 @@ SVMKit is a machine learninig library in Ruby.
|
|
9
9
|
SVMKit provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
|
10
10
|
SVMKit currently supports Linear / Kernel Support Vector Machine,
|
11
11
|
Logistic Regression, Linear Regression, Ridge, Lasso, Factorization Machine,
|
12
|
-
Naive Bayes, Decision Tree, Random Forest, K-nearest neighbor classifier,
|
12
|
+
Naive Bayes, Decision Tree, Random Forest, K-nearest neighbor classifier,
|
13
|
+
K-Means and cross-validation.
|
13
14
|
|
14
15
|
## Installation
|
15
16
|
|
data/Rakefile
CHANGED
data/lib/svmkit.rb
CHANGED
@@ -10,6 +10,7 @@ require 'svmkit/probabilistic_output'
|
|
10
10
|
require 'svmkit/base/base_estimator'
|
11
11
|
require 'svmkit/base/classifier'
|
12
12
|
require 'svmkit/base/regressor'
|
13
|
+
require 'svmkit/base/cluster_analyzer'
|
13
14
|
require 'svmkit/base/transformer'
|
14
15
|
require 'svmkit/base/splitter'
|
15
16
|
require 'svmkit/base/evaluator'
|
@@ -36,6 +37,7 @@ require 'svmkit/tree/decision_tree_classifier'
|
|
36
37
|
require 'svmkit/tree/decision_tree_regressor'
|
37
38
|
require 'svmkit/ensemble/random_forest_classifier'
|
38
39
|
require 'svmkit/ensemble/random_forest_regressor'
|
40
|
+
require 'svmkit/clustering/k_means'
|
39
41
|
require 'svmkit/preprocessing/l2_normalizer'
|
40
42
|
require 'svmkit/preprocessing/min_max_scaler'
|
41
43
|
require 'svmkit/preprocessing/standard_scaler'
|
@@ -52,3 +54,5 @@ require 'svmkit/evaluation_measure/log_loss'
|
|
52
54
|
require 'svmkit/evaluation_measure/r2_score'
|
53
55
|
require 'svmkit/evaluation_measure/mean_squared_error'
|
54
56
|
require 'svmkit/evaluation_measure/mean_absolute_error'
|
57
|
+
require 'svmkit/evaluation_measure/purity'
|
58
|
+
require 'svmkit/evaluation_measure/normalized_mutual_information'
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'svmkit/validation'
|
4
|
+
require 'svmkit/evaluation_measure/purity'
|
5
|
+
|
6
|
+
module SVMKit
|
7
|
+
module Base
|
8
|
+
# Module for all clustering algorithms in SVMKit.
|
9
|
+
module ClusterAnalyzer
|
10
|
+
# An abstract method for analyzing clusters and predicting cluster indices.
|
11
|
+
def fit_predict
|
12
|
+
raise NotImplementedError, "#{__method__} has to be implemented in #{self.class}."
|
13
|
+
end
|
14
|
+
|
15
|
+
# Calculate purity of clustering result.
|
16
|
+
#
|
17
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) Testing data.
|
18
|
+
# @param y [Numo::Int32] (shape: [n_samples]) True labels for testing data.
|
19
|
+
# @return [Float] Purity
|
20
|
+
def score(x, y)
|
21
|
+
SVMKit::Validation.check_sample_array(x)
|
22
|
+
SVMKit::Validation.check_label_array(y)
|
23
|
+
SVMKit::Validation.check_sample_label_size(x, y)
|
24
|
+
evaluator = SVMKit::EvaluationMeasure::Purity.new
|
25
|
+
evaluator.score(y, fit_predict(x))
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,138 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'svmkit/validation'
|
4
|
+
require 'svmkit/base/base_estimator'
|
5
|
+
require 'svmkit/base/cluster_analyzer'
|
6
|
+
require 'svmkit/pairwise_metric'
|
7
|
+
|
8
|
+
module SVMKit
|
9
|
+
# This module consists of classes that implement cluster analysis methods.
|
10
|
+
module Clustering
|
11
|
+
# KMeans is a class that implements K-Means cluster analysis.
|
12
|
+
#
|
13
|
+
# @example
|
14
|
+
# analyzer = SVMKit::Clustering::KMeans.new(n_clusters: 10, max_iter: 50)
|
15
|
+
# cluster_ids = analyzer.fit_predict(samples)
|
16
|
+
#
|
17
|
+
# *Reference*
|
18
|
+
# - D. Arthur and S. Vassilvitskii, "k-means++: the advantages of careful seeding," Proc. SODA'07, pp. 1027--1035, 2007.
|
19
|
+
class KMeans
|
20
|
+
include Base::BaseEstimator
|
21
|
+
include Base::ClusterAnalyzer
|
22
|
+
include Validation
|
23
|
+
|
24
|
+
# Return the centroids.
|
25
|
+
# @return [Numo::DFloat] (shape: [n_clusters, n_features])
|
26
|
+
attr_reader :cluster_centers
|
27
|
+
|
28
|
+
# Return the random generator.
|
29
|
+
# @return [Random]
|
30
|
+
attr_reader :rng
|
31
|
+
|
32
|
+
# Create a new cluster analyzer with K-Means method.
|
33
|
+
#
|
34
|
+
# @param n_clusters [Integer] The number of clusters.
|
35
|
+
# @param init [String] The initialization method for centroids ('random' or 'k-means++').
|
36
|
+
# @param max_iter [Integer] The maximum number of iterations.
|
37
|
+
# @param tol [Float] The tolerance of termination criterion.
|
38
|
+
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
39
|
+
def initialize(n_clusters: 8, init: 'k-means++', max_iter: 50, tol: 1.0e-4, random_seed: nil)
|
40
|
+
check_params_integer(n_clusters: n_clusters, max_iter: max_iter)
|
41
|
+
check_params_string(init: init)
|
42
|
+
check_params_type_or_nil(Integer, random_seed: random_seed)
|
43
|
+
check_params_positive(n_clusters: n_clusters, max_iter: max_iter)
|
44
|
+
@params = {}
|
45
|
+
@params[:n_clusters] = n_clusters
|
46
|
+
@params[:init] = init == 'random' ? 'random' : 'k-means++'
|
47
|
+
@params[:max_iter] = max_iter
|
48
|
+
@params[:tol] = tol
|
49
|
+
@params[:random_seed] = random_seed
|
50
|
+
@params[:random_seed] ||= srand
|
51
|
+
@cluster_centers = nil
|
52
|
+
@rng = Random.new(@params[:random_seed])
|
53
|
+
end
|
54
|
+
|
55
|
+
# Analysis clusters with given training data.
|
56
|
+
#
|
57
|
+
# @overload fit(x) -> KMeans
|
58
|
+
#
|
59
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for cluster analysis.
|
60
|
+
# @return [KMeans] The learned cluster analyzer itself.
|
61
|
+
def fit(x, _y = nil)
|
62
|
+
check_sample_array(x)
|
63
|
+
init_cluster_centers(x)
|
64
|
+
@params[:max_iter].times do |_t|
|
65
|
+
cluster_ids = assign_cluster(x)
|
66
|
+
old_centers = @cluster_centers.dup
|
67
|
+
@params[:n_clusters].times do |n|
|
68
|
+
assigned_bits = cluster_ids.eq(n)
|
69
|
+
@cluster_centers[n, true] = x[assigned_bits.where, true].mean(axis: 0) if assigned_bits.count > 0
|
70
|
+
end
|
71
|
+
error = Numo::NMath.sqrt(((old_centers - @cluster_centers)**2).sum(axis: 1)).mean
|
72
|
+
break if error <= @params[:tol]
|
73
|
+
end
|
74
|
+
self
|
75
|
+
end
|
76
|
+
|
77
|
+
# Predict cluster indices for samples.
|
78
|
+
#
|
79
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the cluster index.
|
80
|
+
# @return [Numo::Int32] (shape: [n_samples]) Predicted cluster index per sample.
|
81
|
+
def predict(x)
|
82
|
+
check_sample_array(x)
|
83
|
+
assign_cluster(x)
|
84
|
+
end
|
85
|
+
|
86
|
+
# Analysis clusters and assign samples to clusters.
|
87
|
+
#
|
88
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for cluster analysis.
|
89
|
+
# @return [Numo::Int32] (shape: [n_samples]) Predicted cluster index per sample.
|
90
|
+
def fit_predict(x)
|
91
|
+
check_sample_array(x)
|
92
|
+
fit(x)
|
93
|
+
predict(x)
|
94
|
+
end
|
95
|
+
|
96
|
+
# Dump marshal data.
|
97
|
+
# @return [Hash] The marshal data.
|
98
|
+
def marshal_dump
|
99
|
+
{ params: @params,
|
100
|
+
cluster_centers: @cluster_centers,
|
101
|
+
rng: @rng }
|
102
|
+
end
|
103
|
+
|
104
|
+
# Load marshal data.
|
105
|
+
# @return [nil]
|
106
|
+
def marshal_load(obj)
|
107
|
+
@params = obj[:params]
|
108
|
+
@cluster_centers = obj[:cluster_centers]
|
109
|
+
@rng = obj[:rng]
|
110
|
+
nil
|
111
|
+
end
|
112
|
+
|
113
|
+
private
|
114
|
+
|
115
|
+
def assign_cluster(x)
|
116
|
+
distance_matrix = PairwiseMetric.euclidean_distance(x, @cluster_centers)
|
117
|
+
distance_matrix.min_index(axis: 1) - Numo::Int32[*0.step(distance_matrix.size - 1, @cluster_centers.shape[0])]
|
118
|
+
end
|
119
|
+
|
120
|
+
def init_cluster_centers(x)
|
121
|
+
# random initialize
|
122
|
+
n_samples = x.shape[0]
|
123
|
+
rand_id = [*0...n_samples].sample(@params[:n_clusters], random: @rng)
|
124
|
+
@cluster_centers = x[rand_id, true].dup
|
125
|
+
return unless @params[:init] == 'k-means++'
|
126
|
+
# k-means++ initialize
|
127
|
+
(1...@params[:n_clusters]).each do |n|
|
128
|
+
distance_matrix = PairwiseMetric.euclidean_distance(x, @cluster_centers[0...n, true])
|
129
|
+
min_distances = distance_matrix.flatten[distance_matrix.min_index(axis: 1)]
|
130
|
+
probs = min_distances**2 / (min_distances**2).sum
|
131
|
+
cum_probs = probs.cumsum
|
132
|
+
selected_id = cum_probs.gt(@rng.rand).where.to_a.first
|
133
|
+
@cluster_centers[n, true] = x[selected_id, true].dup
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'svmkit/validation'
|
4
|
+
require 'svmkit/base/evaluator'
|
5
|
+
|
6
|
+
module SVMKit
|
7
|
+
module EvaluationMeasure
|
8
|
+
# NormalizedMutualInformation is a class that calculates the normalized mutual information of cluatering results.
|
9
|
+
#
|
10
|
+
# @example
|
11
|
+
# evaluator = SVMKit::EvaluationMeasure::NormalizedMutualInformation.new
|
12
|
+
# puts evaluator.score(ground_truth, predicted)
|
13
|
+
#
|
14
|
+
# *Reference*
|
15
|
+
# - C D. Manning, P. Raghavan, and H. Schutze, "Introduction to Information Retrieval," Cambridge University Press., 2008.
|
16
|
+
# - N X. Vinh, J. Epps, and J. Bailey, "Information Theoretic Measures for Clusterings Comparison: Variants, Properties, Normalization and Correction for Chance," J. Machine Learning Research, vol. 11, pp. 2837--1854, 2010.
|
17
|
+
class NormalizedMutualInformation
|
18
|
+
include Base::Evaluator
|
19
|
+
|
20
|
+
# Calculate noramlzied mutual information
|
21
|
+
#
|
22
|
+
# @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth labels.
|
23
|
+
# @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted cluster labels.
|
24
|
+
# @return [Float] Normalized mutual information
|
25
|
+
def score(y_true, y_pred)
|
26
|
+
SVMKit::Validation.check_label_array(y_true)
|
27
|
+
SVMKit::Validation.check_label_array(y_pred)
|
28
|
+
# initiazlie some variables.
|
29
|
+
mutual_information = 0.0
|
30
|
+
n_samples = y_pred.size
|
31
|
+
class_ids = y_true.to_a.uniq
|
32
|
+
cluster_ids = y_pred.to_a.uniq
|
33
|
+
# calculate entropy.
|
34
|
+
class_entropy = -1.0 * class_ids.map do |k|
|
35
|
+
ratio = y_true.eq(k).count.fdiv(n_samples)
|
36
|
+
ratio * Math.log(ratio)
|
37
|
+
end.reduce(:+)
|
38
|
+
return 0.0 if class_entropy.zero?
|
39
|
+
cluster_entropy = -1.0 * cluster_ids.map do |k|
|
40
|
+
ratio = y_pred.eq(k).count.fdiv(n_samples)
|
41
|
+
ratio * Math.log(ratio)
|
42
|
+
end.reduce(:+)
|
43
|
+
return 0.0 if cluster_entropy.zero?
|
44
|
+
# calculate mutual information.
|
45
|
+
cluster_ids.map do |k|
|
46
|
+
pr_sample_ids = y_pred.eq(k).where.to_a
|
47
|
+
n_pr_samples = pr_sample_ids.size
|
48
|
+
class_ids.map do |j|
|
49
|
+
tr_sample_ids = y_true.eq(j).where.to_a
|
50
|
+
n_tr_samples = tr_sample_ids.size
|
51
|
+
n_intr_samples = (pr_sample_ids & tr_sample_ids).size
|
52
|
+
if n_intr_samples > 0
|
53
|
+
mutual_information +=
|
54
|
+
n_intr_samples.fdiv(n_samples) * Math.log((n_samples * n_intr_samples).fdiv(n_pr_samples * n_tr_samples))
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
# return normalized mutual information.
|
59
|
+
mutual_information / Math.sqrt(class_entropy * cluster_entropy)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'svmkit/validation'
|
4
|
+
require 'svmkit/base/evaluator'
|
5
|
+
|
6
|
+
module SVMKit
|
7
|
+
module EvaluationMeasure
|
8
|
+
# Purity is a class that calculates the purity of cluatering results.
|
9
|
+
#
|
10
|
+
# @example
|
11
|
+
# evaluator = SVMKit::EvaluationMeasure::Purity.new
|
12
|
+
# puts evaluator.score(ground_truth, predicted)
|
13
|
+
#
|
14
|
+
# *Reference*
|
15
|
+
# - C D. Manning, P. Raghavan, and H. Schutze, "Introduction to Information Retrieval," Cambridge University Press., 2008.
|
16
|
+
class Purity
|
17
|
+
include Base::Evaluator
|
18
|
+
|
19
|
+
# Calculate purity
|
20
|
+
#
|
21
|
+
# @param y_true [Numo::Int32] (shape: [n_samples]) Ground truth labels.
|
22
|
+
# @param y_pred [Numo::Int32] (shape: [n_samples]) Predicted cluster labels.
|
23
|
+
# @return [Float] Purity
|
24
|
+
def score(y_true, y_pred)
|
25
|
+
SVMKit::Validation.check_label_array(y_true)
|
26
|
+
SVMKit::Validation.check_label_array(y_pred)
|
27
|
+
# initiazlie some variables.
|
28
|
+
purity = 0
|
29
|
+
n_samples = y_pred.size
|
30
|
+
class_ids = y_true.to_a.uniq
|
31
|
+
cluster_ids = y_pred.to_a.uniq
|
32
|
+
# calculate purity.
|
33
|
+
cluster_ids.each do |k|
|
34
|
+
pr_sample_ids = y_pred.eq(k).where.to_a
|
35
|
+
purity += class_ids.map { |j| (pr_sample_ids & y_true.eq(j).where.to_a).size }.max
|
36
|
+
end
|
37
|
+
purity.fdiv(n_samples)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
data/lib/svmkit/version.rb
CHANGED
data/svmkit.gemspec
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
|
2
1
|
lib = File.expand_path('lib', __dir__)
|
3
2
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
3
|
require 'svmkit/version'
|
@@ -18,7 +17,8 @@ SVMKit is a machine learninig library in Ruby.
|
|
18
17
|
SVMKit provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
|
19
18
|
SVMKit currently supports Linear / Kernel Support Vector Machine,
|
20
19
|
Logistic Regression, Linear Regression, Ridge, Lasso, Factorization Machine,
|
21
|
-
Naive Bayes, Decision Tree, Random Forest, K-nearest neighbor algorithm,
|
20
|
+
Naive Bayes, Decision Tree, Random Forest, K-nearest neighbor algorithm,
|
21
|
+
K-Means and cross-validation.
|
22
22
|
MSG
|
23
23
|
spec.homepage = 'https://github.com/yoshoku/svmkit'
|
24
24
|
spec.license = 'BSD-2-Clause'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: svmkit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-06-
|
11
|
+
date: 2018-06-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|
@@ -85,7 +85,8 @@ description: |
|
|
85
85
|
SVMKit provides machine learning algorithms with interfaces similar to Scikit-Learn in Python.
|
86
86
|
SVMKit currently supports Linear / Kernel Support Vector Machine,
|
87
87
|
Logistic Regression, Linear Regression, Ridge, Lasso, Factorization Machine,
|
88
|
-
Naive Bayes, Decision Tree, Random Forest, K-nearest neighbor algorithm,
|
88
|
+
Naive Bayes, Decision Tree, Random Forest, K-nearest neighbor algorithm,
|
89
|
+
K-Means and cross-validation.
|
89
90
|
email:
|
90
91
|
- yoshoku@outlook.com
|
91
92
|
executables: []
|
@@ -109,10 +110,12 @@ files:
|
|
109
110
|
- lib/svmkit.rb
|
110
111
|
- lib/svmkit/base/base_estimator.rb
|
111
112
|
- lib/svmkit/base/classifier.rb
|
113
|
+
- lib/svmkit/base/cluster_analyzer.rb
|
112
114
|
- lib/svmkit/base/evaluator.rb
|
113
115
|
- lib/svmkit/base/regressor.rb
|
114
116
|
- lib/svmkit/base/splitter.rb
|
115
117
|
- lib/svmkit/base/transformer.rb
|
118
|
+
- lib/svmkit/clustering/k_means.rb
|
116
119
|
- lib/svmkit/dataset.rb
|
117
120
|
- lib/svmkit/ensemble/random_forest_classifier.rb
|
118
121
|
- lib/svmkit/ensemble/random_forest_regressor.rb
|
@@ -121,8 +124,10 @@ files:
|
|
121
124
|
- lib/svmkit/evaluation_measure/log_loss.rb
|
122
125
|
- lib/svmkit/evaluation_measure/mean_absolute_error.rb
|
123
126
|
- lib/svmkit/evaluation_measure/mean_squared_error.rb
|
127
|
+
- lib/svmkit/evaluation_measure/normalized_mutual_information.rb
|
124
128
|
- lib/svmkit/evaluation_measure/precision.rb
|
125
129
|
- lib/svmkit/evaluation_measure/precision_recall.rb
|
130
|
+
- lib/svmkit/evaluation_measure/purity.rb
|
126
131
|
- lib/svmkit/evaluation_measure/r2_score.rb
|
127
132
|
- lib/svmkit/evaluation_measure/recall.rb
|
128
133
|
- lib/svmkit/kernel_approximation/rbf.rb
|