svmkit 0.5.2 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/HISTORY.md +4 -0
- data/README.md +2 -1
- data/lib/svmkit.rb +2 -0
- data/lib/svmkit/decomposition/nmf.rb +147 -0
- data/lib/svmkit/decomposition/pca.rb +150 -0
- data/lib/svmkit/version.rb +1 -1
- data/svmkit.gemspec +2 -1
- metadata +6 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d8486463886064e5aa5169dbed20101a01d01101483226cf96e38b377144e153
|
4
|
+
data.tar.gz: 79807cdbe9f10fba17cd91dbcd68c713180fd986e9b6fab575a849c762bb0d2e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2ec6c45c99bda82813644b7b4a043a4dda27810e24e10b4588182cbc3dea106eb05ac013354177861ab4abf552d89670ea1f0d39d50976a232feedc0daee9030
|
7
|
+
data.tar.gz: 373a566f294bab3d6fb232516f5720a877b6e20999c3253ae49044d759f83b06999fef4e259df91b950fe49c395042c1a74675fadb326460a7e65d54048ca325
|
data/HISTORY.md
CHANGED
data/README.md
CHANGED
@@ -10,7 +10,8 @@ SVMKit provides machine learning algorithms with interfaces similar to Scikit-Le
|
|
10
10
|
SVMKit currently supports Linear / Kernel Support Vector Machine,
|
11
11
|
Logistic Regression, Linear Regression, Ridge, Lasso, Factorization Machine,
|
12
12
|
Naive Bayes, Decision Tree, Random Forest, K-nearest neighbor classifier,
|
13
|
-
K-Means, DBSCAN
|
13
|
+
K-Means, DBSCAN, Principal Component Analysis, Non-negative Matrix Factorization
|
14
|
+
and cross-validation.
|
14
15
|
|
15
16
|
## Installation
|
16
17
|
|
data/lib/svmkit.rb
CHANGED
@@ -39,6 +39,8 @@ require 'svmkit/ensemble/random_forest_classifier'
|
|
39
39
|
require 'svmkit/ensemble/random_forest_regressor'
|
40
40
|
require 'svmkit/clustering/k_means'
|
41
41
|
require 'svmkit/clustering/dbscan'
|
42
|
+
require 'svmkit/decomposition/pca'
|
43
|
+
require 'svmkit/decomposition/nmf'
|
42
44
|
require 'svmkit/preprocessing/l2_normalizer'
|
43
45
|
require 'svmkit/preprocessing/min_max_scaler'
|
44
46
|
require 'svmkit/preprocessing/standard_scaler'
|
@@ -0,0 +1,147 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'svmkit/validation'
|
4
|
+
require 'svmkit/base/base_estimator'
|
5
|
+
require 'svmkit/base/transformer'
|
6
|
+
|
7
|
+
module SVMKit
|
8
|
+
module Decomposition
|
9
|
+
# NMF is a class that implements Non-negative Matrix Factorization.
|
10
|
+
#
|
11
|
+
# @example
|
12
|
+
# decomposer = SVMKit::Decomposition::NMF.new(n_components: 2)
|
13
|
+
# representaion = decomposer.fit_transform(samples)
|
14
|
+
#
|
15
|
+
# *Reference*
|
16
|
+
# - W. Xu, X. Liu, and Y.Gong, "Document Clustering Based On Non-negative Matrix Factorization," Proc. SIGIR' 03 , pp. 267--273, 2003.
|
17
|
+
class NMF
|
18
|
+
include Base::BaseEstimator
|
19
|
+
include Base::Transformer
|
20
|
+
include Validation
|
21
|
+
|
22
|
+
# Returns the factorization matrix.
|
23
|
+
# @return [Numo::DFloat] (shape: [n_components, n_features])
|
24
|
+
attr_reader :components
|
25
|
+
|
26
|
+
# Return the random generator.
|
27
|
+
# @return [Random]
|
28
|
+
attr_reader :rng
|
29
|
+
|
30
|
+
# Create a new transformer with NMF.
|
31
|
+
#
|
32
|
+
# @param n_components [Integer] The number of components.
|
33
|
+
# @param max_iter [Integer] The maximum number of iterations.
|
34
|
+
# @param tol [Float] The tolerance of termination criterion.
|
35
|
+
# @param eps [Float] A small value close to zero to avoid zero division error.
|
36
|
+
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
37
|
+
def initialize(n_components: 2, max_iter: 500, tol: 1.0e-4, eps: 1.0e-16, random_seed: nil)
|
38
|
+
check_params_integer(n_components: n_components, max_iter: max_iter)
|
39
|
+
check_params_float(tol: tol, eps: eps)
|
40
|
+
check_params_type_or_nil(Integer, random_seed: random_seed)
|
41
|
+
check_params_positive(n_components: n_components, max_iter: max_iter, tol: tol, eps: eps)
|
42
|
+
@params = {}
|
43
|
+
@params[:n_components] = n_components
|
44
|
+
@params[:max_iter] = max_iter
|
45
|
+
@params[:tol] = tol
|
46
|
+
@params[:eps] = eps
|
47
|
+
@params[:random_seed] = random_seed
|
48
|
+
@params[:random_seed] ||= srand
|
49
|
+
@components = nil
|
50
|
+
@rng = Random.new(@params[:random_seed])
|
51
|
+
end
|
52
|
+
|
53
|
+
# Fit the model with given training data.
|
54
|
+
#
|
55
|
+
# @overload fit(x) -> NMF
|
56
|
+
#
|
57
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
58
|
+
# @return [NMF] The learned transformer itself.
|
59
|
+
def fit(x, _y = nil)
|
60
|
+
check_sample_array(x)
|
61
|
+
partial_fit(x)
|
62
|
+
self
|
63
|
+
end
|
64
|
+
|
65
|
+
# Fit the model with training data, and then transform them with the learned model.
|
66
|
+
#
|
67
|
+
# @overload fit_transform(x) -> Numo::DFloat
|
68
|
+
#
|
69
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
70
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
|
71
|
+
def fit_transform(x, _y = nil)
|
72
|
+
check_sample_array(x)
|
73
|
+
partial_fit(x)
|
74
|
+
end
|
75
|
+
|
76
|
+
# Transform the given data with the learned model.
|
77
|
+
#
|
78
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
|
79
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
|
80
|
+
def transform(x)
|
81
|
+
check_sample_array(x)
|
82
|
+
partial_fit(x, false)
|
83
|
+
end
|
84
|
+
|
85
|
+
# Inverse transform the given transformed data with the learned model.
|
86
|
+
#
|
87
|
+
# @param z [Numo::DFloat] (shape: [n_samples, n_components]) The data to be restored into original space with the learned model.
|
88
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_featuress]) The restored data.
|
89
|
+
def inverse_transform(z)
|
90
|
+
check_sample_array(z)
|
91
|
+
z.dot(@components)
|
92
|
+
end
|
93
|
+
|
94
|
+
# Dump marshal data.
|
95
|
+
# @return [Hash] The marshal data.
|
96
|
+
def marshal_dump
|
97
|
+
{ params: @params,
|
98
|
+
components: @components,
|
99
|
+
rng: @rng }
|
100
|
+
end
|
101
|
+
|
102
|
+
# Load marshal data.
|
103
|
+
# @return [nil]
|
104
|
+
def marshal_load(obj)
|
105
|
+
@params = obj[:params]
|
106
|
+
@components = obj[:components]
|
107
|
+
@rng = obj[:rng]
|
108
|
+
nil
|
109
|
+
end
|
110
|
+
|
111
|
+
private
|
112
|
+
|
113
|
+
def partial_fit(x, update_comps = true)
|
114
|
+
# initialize some variables.
|
115
|
+
n_samples, n_features = x.shape
|
116
|
+
scale = Math.sqrt(x.mean / @params[:n_components])
|
117
|
+
@components = rand_uniform([@params[:n_components], n_features]) * scale if update_comps
|
118
|
+
coefficients = rand_uniform([n_samples, @params[:n_components]]) * scale
|
119
|
+
# optimization.
|
120
|
+
@params[:max_iter].times do
|
121
|
+
# update
|
122
|
+
if update_comps
|
123
|
+
nume = coefficients.transpose.dot(x)
|
124
|
+
deno = (coefficients.transpose.dot(coefficients)).dot(@components) + @params[:eps]
|
125
|
+
@components *= (nume / deno)
|
126
|
+
end
|
127
|
+
nume = x.dot(@components.transpose)
|
128
|
+
deno = (coefficients.dot(@components)).dot(@components.transpose) + @params[:eps]
|
129
|
+
coefficients *= (nume / deno)
|
130
|
+
# normalize
|
131
|
+
norm = Numo::NMath.sqrt((@components**2).sum(1)) + @params[:eps]
|
132
|
+
@components /= norm.expand_dims(1) if update_comps
|
133
|
+
coefficients *= norm
|
134
|
+
# check convergence
|
135
|
+
err = ((x - coefficients.dot(@components))**2).sum(1).mean
|
136
|
+
break if err < @params[:tol]
|
137
|
+
end
|
138
|
+
coefficients
|
139
|
+
end
|
140
|
+
|
141
|
+
def rand_uniform(shape)
|
142
|
+
rnd_vals = Array.new(shape.inject(:*)) { @rng.rand }
|
143
|
+
Numo::DFloat.asarray(rnd_vals).reshape(shape[0], shape[1])
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
@@ -0,0 +1,150 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'svmkit/validation'
|
4
|
+
require 'svmkit/base/base_estimator'
|
5
|
+
require 'svmkit/base/transformer'
|
6
|
+
|
7
|
+
module SVMKit
|
8
|
+
# Module for matrix decomposition algorithms.
|
9
|
+
module Decomposition
|
10
|
+
# PCA is a class that implements Principal Component Analysis.
|
11
|
+
#
|
12
|
+
# @example
|
13
|
+
# decomposer = SVMKit::Decomposition::PCA.new(n_components: 2)
|
14
|
+
# representaion = decomposer.fit_transform(samples)
|
15
|
+
#
|
16
|
+
# *Reference*
|
17
|
+
# - A. Sharma and K K. Paliwal, "Fast principal component analysis using fixed-point algorithm," Pattern Recognition Letters, 28, pp. 1151--1155, 2007.
|
18
|
+
class PCA
|
19
|
+
include Base::BaseEstimator
|
20
|
+
include Base::Transformer
|
21
|
+
include Validation
|
22
|
+
|
23
|
+
# Returns the principal components.
|
24
|
+
# @return [Numo::DFloat] (shape: [n_components, n_features])
|
25
|
+
attr_reader :components
|
26
|
+
|
27
|
+
# Returns the mean vector.
|
28
|
+
# @return [Numo::DFloat] (shape: [n_features]
|
29
|
+
attr_reader :mean
|
30
|
+
|
31
|
+
# Return the random generator.
|
32
|
+
# @return [Random]
|
33
|
+
attr_reader :rng
|
34
|
+
|
35
|
+
# Create a new transformer with PCA.
|
36
|
+
#
|
37
|
+
# @param n_components [Integer] The number of principal components.
|
38
|
+
# @param max_iter [Integer] The maximum number of iterations.
|
39
|
+
# @param tol [Float] The tolerance of termination criterion.
|
40
|
+
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
41
|
+
def initialize(n_components: 2, max_iter: 100, tol: 1.0e-4, random_seed: nil)
|
42
|
+
check_params_integer(n_components: n_components, max_iter: max_iter)
|
43
|
+
check_params_float(tol: tol)
|
44
|
+
check_params_type_or_nil(Integer, random_seed: random_seed)
|
45
|
+
check_params_positive(n_components: n_components, max_iter: max_iter, tol: tol)
|
46
|
+
@params = {}
|
47
|
+
@params[:n_components] = n_components
|
48
|
+
@params[:max_iter] = max_iter
|
49
|
+
@params[:tol] = tol
|
50
|
+
@params[:random_seed] = random_seed
|
51
|
+
@params[:random_seed] ||= srand
|
52
|
+
@components = nil
|
53
|
+
@mean = nil
|
54
|
+
@rng = Random.new(@params[:random_seed])
|
55
|
+
end
|
56
|
+
|
57
|
+
# Fit the model with given training data.
|
58
|
+
#
|
59
|
+
# @overload fit(x) -> PCA
|
60
|
+
#
|
61
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
62
|
+
# @return [PCA] The learned transformer itself.
|
63
|
+
def fit(x, _y = nil)
|
64
|
+
check_sample_array(x)
|
65
|
+
# initialize some variables.
|
66
|
+
@components = nil
|
67
|
+
n_samples, n_features = x.shape
|
68
|
+
# centering.
|
69
|
+
@mean = x.mean(0)
|
70
|
+
centered_x = x - @mean
|
71
|
+
# optimization.
|
72
|
+
covariance_mat = centered_x.transpose.dot(centered_x) / (n_samples - 1)
|
73
|
+
@params[:n_components].times do
|
74
|
+
comp_vec = random_vec(n_features)
|
75
|
+
@params[:max_iter].times do
|
76
|
+
updated = orthogonalize(covariance_mat.dot(comp_vec))
|
77
|
+
break if (updated.dot(comp_vec) - 1).abs < @params[:tol]
|
78
|
+
comp_vec = updated
|
79
|
+
end
|
80
|
+
@components = @components.nil? ? comp_vec : Numo::NArray.vstack([@components, comp_vec])
|
81
|
+
end
|
82
|
+
self
|
83
|
+
end
|
84
|
+
|
85
|
+
# Fit the model with training data, and then transform them with the learned model.
|
86
|
+
#
|
87
|
+
# @overload fit_transform(x) -> Numo::DFloat
|
88
|
+
#
|
89
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
90
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
|
91
|
+
def fit_transform(x, _y = nil)
|
92
|
+
check_sample_array(x)
|
93
|
+
fit(x).transform(x)
|
94
|
+
end
|
95
|
+
|
96
|
+
# Transform the given data with the learned model.
|
97
|
+
#
|
98
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
|
99
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
|
100
|
+
def transform(x)
|
101
|
+
check_sample_array(x)
|
102
|
+
(x - @mean).dot(@components.transpose)
|
103
|
+
end
|
104
|
+
|
105
|
+
# Inverse transform the given transformed data with the learned model.
|
106
|
+
#
|
107
|
+
# @param z [Numo::DFloat] (shape: [n_samples, n_components]) The data to be restored into original space with the learned model.
|
108
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_featuress]) The restored data.
|
109
|
+
def inverse_transform(z)
|
110
|
+
check_sample_array(z)
|
111
|
+
c = @components.shape[1].nil? ? @components.expand_dims(0) : @components
|
112
|
+
z.dot(c) + @mean
|
113
|
+
end
|
114
|
+
|
115
|
+
# Dump marshal data.
|
116
|
+
# @return [Hash] The marshal data.
|
117
|
+
def marshal_dump
|
118
|
+
{ params: @params,
|
119
|
+
components: @components,
|
120
|
+
mean: @mean,
|
121
|
+
rng: @rng }
|
122
|
+
end
|
123
|
+
|
124
|
+
# Load marshal data.
|
125
|
+
# @return [nil]
|
126
|
+
def marshal_load(obj)
|
127
|
+
@params = obj[:params]
|
128
|
+
@components = obj[:components]
|
129
|
+
@mean = obj[:mean]
|
130
|
+
@rng = obj[:rng]
|
131
|
+
nil
|
132
|
+
end
|
133
|
+
|
134
|
+
private
|
135
|
+
|
136
|
+
def orthogonalize(pcvec)
|
137
|
+
unless @components.nil?
|
138
|
+
delta = @components.dot(pcvec) * @components.transpose
|
139
|
+
delta = delta.sum(1) unless delta.shape[1].nil?
|
140
|
+
pcvec -= delta
|
141
|
+
end
|
142
|
+
pcvec / Math.sqrt((pcvec**2).sum.abs) + 1.0e-12
|
143
|
+
end
|
144
|
+
|
145
|
+
def random_vec(n_features)
|
146
|
+
Numo::DFloat[*(Array.new(n_features) { @rng.rand })]
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
data/lib/svmkit/version.rb
CHANGED
data/svmkit.gemspec
CHANGED
@@ -18,7 +18,8 @@ SVMKit provides machine learning algorithms with interfaces similar to Scikit-Le
|
|
18
18
|
SVMKit currently supports Linear / Kernel Support Vector Machine,
|
19
19
|
Logistic Regression, Linear Regression, Ridge, Lasso, Factorization Machine,
|
20
20
|
Naive Bayes, Decision Tree, Random Forest, K-nearest neighbor algorithm,
|
21
|
-
K-Means, DBSCAN
|
21
|
+
K-Means, DBSCAN, Principal Component Analysis, Non-negative Matrix Factorization
|
22
|
+
and cross-validation.
|
22
23
|
MSG
|
23
24
|
spec.homepage = 'https://github.com/yoshoku/svmkit'
|
24
25
|
spec.license = 'BSD-2-Clause'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: svmkit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-06-
|
11
|
+
date: 2018-06-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|
@@ -86,7 +86,8 @@ description: |
|
|
86
86
|
SVMKit currently supports Linear / Kernel Support Vector Machine,
|
87
87
|
Logistic Regression, Linear Regression, Ridge, Lasso, Factorization Machine,
|
88
88
|
Naive Bayes, Decision Tree, Random Forest, K-nearest neighbor algorithm,
|
89
|
-
K-Means, DBSCAN
|
89
|
+
K-Means, DBSCAN, Principal Component Analysis, Non-negative Matrix Factorization
|
90
|
+
and cross-validation.
|
90
91
|
email:
|
91
92
|
- yoshoku@outlook.com
|
92
93
|
executables: []
|
@@ -118,6 +119,8 @@ files:
|
|
118
119
|
- lib/svmkit/clustering/dbscan.rb
|
119
120
|
- lib/svmkit/clustering/k_means.rb
|
120
121
|
- lib/svmkit/dataset.rb
|
122
|
+
- lib/svmkit/decomposition/nmf.rb
|
123
|
+
- lib/svmkit/decomposition/pca.rb
|
121
124
|
- lib/svmkit/ensemble/random_forest_classifier.rb
|
122
125
|
- lib/svmkit/ensemble/random_forest_regressor.rb
|
123
126
|
- lib/svmkit/evaluation_measure/accuracy.rb
|