svmkit 0.5.2 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/HISTORY.md +4 -0
- data/README.md +2 -1
- data/lib/svmkit.rb +2 -0
- data/lib/svmkit/decomposition/nmf.rb +147 -0
- data/lib/svmkit/decomposition/pca.rb +150 -0
- data/lib/svmkit/version.rb +1 -1
- data/svmkit.gemspec +2 -1
- metadata +6 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d8486463886064e5aa5169dbed20101a01d01101483226cf96e38b377144e153
|
4
|
+
data.tar.gz: 79807cdbe9f10fba17cd91dbcd68c713180fd986e9b6fab575a849c762bb0d2e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2ec6c45c99bda82813644b7b4a043a4dda27810e24e10b4588182cbc3dea106eb05ac013354177861ab4abf552d89670ea1f0d39d50976a232feedc0daee9030
|
7
|
+
data.tar.gz: 373a566f294bab3d6fb232516f5720a877b6e20999c3253ae49044d759f83b06999fef4e259df91b950fe49c395042c1a74675fadb326460a7e65d54048ca325
|
data/HISTORY.md
CHANGED
data/README.md
CHANGED
@@ -10,7 +10,8 @@ SVMKit provides machine learning algorithms with interfaces similar to Scikit-Le
|
|
10
10
|
SVMKit currently supports Linear / Kernel Support Vector Machine,
|
11
11
|
Logistic Regression, Linear Regression, Ridge, Lasso, Factorization Machine,
|
12
12
|
Naive Bayes, Decision Tree, Random Forest, K-nearest neighbor classifier,
|
13
|
-
K-Means, DBSCAN
|
13
|
+
K-Means, DBSCAN, Principal Component Analysis, Non-negative Matrix Factorization
|
14
|
+
and cross-validation.
|
14
15
|
|
15
16
|
## Installation
|
16
17
|
|
data/lib/svmkit.rb
CHANGED
@@ -39,6 +39,8 @@ require 'svmkit/ensemble/random_forest_classifier'
|
|
39
39
|
require 'svmkit/ensemble/random_forest_regressor'
|
40
40
|
require 'svmkit/clustering/k_means'
|
41
41
|
require 'svmkit/clustering/dbscan'
|
42
|
+
require 'svmkit/decomposition/pca'
|
43
|
+
require 'svmkit/decomposition/nmf'
|
42
44
|
require 'svmkit/preprocessing/l2_normalizer'
|
43
45
|
require 'svmkit/preprocessing/min_max_scaler'
|
44
46
|
require 'svmkit/preprocessing/standard_scaler'
|
@@ -0,0 +1,147 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'svmkit/validation'
|
4
|
+
require 'svmkit/base/base_estimator'
|
5
|
+
require 'svmkit/base/transformer'
|
6
|
+
|
7
|
+
module SVMKit
|
8
|
+
module Decomposition
|
9
|
+
# NMF is a class that implements Non-negative Matrix Factorization.
|
10
|
+
#
|
11
|
+
# @example
|
12
|
+
# decomposer = SVMKit::Decomposition::NMF.new(n_components: 2)
|
13
|
+
# representaion = decomposer.fit_transform(samples)
|
14
|
+
#
|
15
|
+
# *Reference*
|
16
|
+
# - W. Xu, X. Liu, and Y.Gong, "Document Clustering Based On Non-negative Matrix Factorization," Proc. SIGIR' 03 , pp. 267--273, 2003.
|
17
|
+
class NMF
|
18
|
+
include Base::BaseEstimator
|
19
|
+
include Base::Transformer
|
20
|
+
include Validation
|
21
|
+
|
22
|
+
# Returns the factorization matrix.
|
23
|
+
# @return [Numo::DFloat] (shape: [n_components, n_features])
|
24
|
+
attr_reader :components
|
25
|
+
|
26
|
+
# Return the random generator.
|
27
|
+
# @return [Random]
|
28
|
+
attr_reader :rng
|
29
|
+
|
30
|
+
# Create a new transformer with NMF.
|
31
|
+
#
|
32
|
+
# @param n_components [Integer] The number of components.
|
33
|
+
# @param max_iter [Integer] The maximum number of iterations.
|
34
|
+
# @param tol [Float] The tolerance of termination criterion.
|
35
|
+
# @param eps [Float] A small value close to zero to avoid zero division error.
|
36
|
+
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
37
|
+
def initialize(n_components: 2, max_iter: 500, tol: 1.0e-4, eps: 1.0e-16, random_seed: nil)
|
38
|
+
check_params_integer(n_components: n_components, max_iter: max_iter)
|
39
|
+
check_params_float(tol: tol, eps: eps)
|
40
|
+
check_params_type_or_nil(Integer, random_seed: random_seed)
|
41
|
+
check_params_positive(n_components: n_components, max_iter: max_iter, tol: tol, eps: eps)
|
42
|
+
@params = {}
|
43
|
+
@params[:n_components] = n_components
|
44
|
+
@params[:max_iter] = max_iter
|
45
|
+
@params[:tol] = tol
|
46
|
+
@params[:eps] = eps
|
47
|
+
@params[:random_seed] = random_seed
|
48
|
+
@params[:random_seed] ||= srand
|
49
|
+
@components = nil
|
50
|
+
@rng = Random.new(@params[:random_seed])
|
51
|
+
end
|
52
|
+
|
53
|
+
# Fit the model with given training data.
|
54
|
+
#
|
55
|
+
# @overload fit(x) -> NMF
|
56
|
+
#
|
57
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
58
|
+
# @return [NMF] The learned transformer itself.
|
59
|
+
def fit(x, _y = nil)
|
60
|
+
check_sample_array(x)
|
61
|
+
partial_fit(x)
|
62
|
+
self
|
63
|
+
end
|
64
|
+
|
65
|
+
# Fit the model with training data, and then transform them with the learned model.
|
66
|
+
#
|
67
|
+
# @overload fit_transform(x) -> Numo::DFloat
|
68
|
+
#
|
69
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
70
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
|
71
|
+
def fit_transform(x, _y = nil)
|
72
|
+
check_sample_array(x)
|
73
|
+
partial_fit(x)
|
74
|
+
end
|
75
|
+
|
76
|
+
# Transform the given data with the learned model.
|
77
|
+
#
|
78
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
|
79
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
|
80
|
+
def transform(x)
|
81
|
+
check_sample_array(x)
|
82
|
+
partial_fit(x, false)
|
83
|
+
end
|
84
|
+
|
85
|
+
# Inverse transform the given transformed data with the learned model.
|
86
|
+
#
|
87
|
+
# @param z [Numo::DFloat] (shape: [n_samples, n_components]) The data to be restored into original space with the learned model.
|
88
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_featuress]) The restored data.
|
89
|
+
def inverse_transform(z)
|
90
|
+
check_sample_array(z)
|
91
|
+
z.dot(@components)
|
92
|
+
end
|
93
|
+
|
94
|
+
# Dump marshal data.
|
95
|
+
# @return [Hash] The marshal data.
|
96
|
+
def marshal_dump
|
97
|
+
{ params: @params,
|
98
|
+
components: @components,
|
99
|
+
rng: @rng }
|
100
|
+
end
|
101
|
+
|
102
|
+
# Load marshal data.
|
103
|
+
# @return [nil]
|
104
|
+
def marshal_load(obj)
|
105
|
+
@params = obj[:params]
|
106
|
+
@components = obj[:components]
|
107
|
+
@rng = obj[:rng]
|
108
|
+
nil
|
109
|
+
end
|
110
|
+
|
111
|
+
private
|
112
|
+
|
113
|
+
def partial_fit(x, update_comps = true)
|
114
|
+
# initialize some variables.
|
115
|
+
n_samples, n_features = x.shape
|
116
|
+
scale = Math.sqrt(x.mean / @params[:n_components])
|
117
|
+
@components = rand_uniform([@params[:n_components], n_features]) * scale if update_comps
|
118
|
+
coefficients = rand_uniform([n_samples, @params[:n_components]]) * scale
|
119
|
+
# optimization.
|
120
|
+
@params[:max_iter].times do
|
121
|
+
# update
|
122
|
+
if update_comps
|
123
|
+
nume = coefficients.transpose.dot(x)
|
124
|
+
deno = (coefficients.transpose.dot(coefficients)).dot(@components) + @params[:eps]
|
125
|
+
@components *= (nume / deno)
|
126
|
+
end
|
127
|
+
nume = x.dot(@components.transpose)
|
128
|
+
deno = (coefficients.dot(@components)).dot(@components.transpose) + @params[:eps]
|
129
|
+
coefficients *= (nume / deno)
|
130
|
+
# normalize
|
131
|
+
norm = Numo::NMath.sqrt((@components**2).sum(1)) + @params[:eps]
|
132
|
+
@components /= norm.expand_dims(1) if update_comps
|
133
|
+
coefficients *= norm
|
134
|
+
# check convergence
|
135
|
+
err = ((x - coefficients.dot(@components))**2).sum(1).mean
|
136
|
+
break if err < @params[:tol]
|
137
|
+
end
|
138
|
+
coefficients
|
139
|
+
end
|
140
|
+
|
141
|
+
def rand_uniform(shape)
|
142
|
+
rnd_vals = Array.new(shape.inject(:*)) { @rng.rand }
|
143
|
+
Numo::DFloat.asarray(rnd_vals).reshape(shape[0], shape[1])
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
@@ -0,0 +1,150 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'svmkit/validation'
|
4
|
+
require 'svmkit/base/base_estimator'
|
5
|
+
require 'svmkit/base/transformer'
|
6
|
+
|
7
|
+
module SVMKit
|
8
|
+
# Module for matrix decomposition algorithms.
|
9
|
+
module Decomposition
|
10
|
+
# PCA is a class that implements Principal Component Analysis.
|
11
|
+
#
|
12
|
+
# @example
|
13
|
+
# decomposer = SVMKit::Decomposition::PCA.new(n_components: 2)
|
14
|
+
# representaion = decomposer.fit_transform(samples)
|
15
|
+
#
|
16
|
+
# *Reference*
|
17
|
+
# - A. Sharma and K K. Paliwal, "Fast principal component analysis using fixed-point algorithm," Pattern Recognition Letters, 28, pp. 1151--1155, 2007.
|
18
|
+
class PCA
|
19
|
+
include Base::BaseEstimator
|
20
|
+
include Base::Transformer
|
21
|
+
include Validation
|
22
|
+
|
23
|
+
# Returns the principal components.
|
24
|
+
# @return [Numo::DFloat] (shape: [n_components, n_features])
|
25
|
+
attr_reader :components
|
26
|
+
|
27
|
+
# Returns the mean vector.
|
28
|
+
# @return [Numo::DFloat] (shape: [n_features]
|
29
|
+
attr_reader :mean
|
30
|
+
|
31
|
+
# Return the random generator.
|
32
|
+
# @return [Random]
|
33
|
+
attr_reader :rng
|
34
|
+
|
35
|
+
# Create a new transformer with PCA.
|
36
|
+
#
|
37
|
+
# @param n_components [Integer] The number of principal components.
|
38
|
+
# @param max_iter [Integer] The maximum number of iterations.
|
39
|
+
# @param tol [Float] The tolerance of termination criterion.
|
40
|
+
# @param random_seed [Integer] The seed value using to initialize the random generator.
|
41
|
+
def initialize(n_components: 2, max_iter: 100, tol: 1.0e-4, random_seed: nil)
|
42
|
+
check_params_integer(n_components: n_components, max_iter: max_iter)
|
43
|
+
check_params_float(tol: tol)
|
44
|
+
check_params_type_or_nil(Integer, random_seed: random_seed)
|
45
|
+
check_params_positive(n_components: n_components, max_iter: max_iter, tol: tol)
|
46
|
+
@params = {}
|
47
|
+
@params[:n_components] = n_components
|
48
|
+
@params[:max_iter] = max_iter
|
49
|
+
@params[:tol] = tol
|
50
|
+
@params[:random_seed] = random_seed
|
51
|
+
@params[:random_seed] ||= srand
|
52
|
+
@components = nil
|
53
|
+
@mean = nil
|
54
|
+
@rng = Random.new(@params[:random_seed])
|
55
|
+
end
|
56
|
+
|
57
|
+
# Fit the model with given training data.
|
58
|
+
#
|
59
|
+
# @overload fit(x) -> PCA
|
60
|
+
#
|
61
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
62
|
+
# @return [PCA] The learned transformer itself.
|
63
|
+
def fit(x, _y = nil)
|
64
|
+
check_sample_array(x)
|
65
|
+
# initialize some variables.
|
66
|
+
@components = nil
|
67
|
+
n_samples, n_features = x.shape
|
68
|
+
# centering.
|
69
|
+
@mean = x.mean(0)
|
70
|
+
centered_x = x - @mean
|
71
|
+
# optimization.
|
72
|
+
covariance_mat = centered_x.transpose.dot(centered_x) / (n_samples - 1)
|
73
|
+
@params[:n_components].times do
|
74
|
+
comp_vec = random_vec(n_features)
|
75
|
+
@params[:max_iter].times do
|
76
|
+
updated = orthogonalize(covariance_mat.dot(comp_vec))
|
77
|
+
break if (updated.dot(comp_vec) - 1).abs < @params[:tol]
|
78
|
+
comp_vec = updated
|
79
|
+
end
|
80
|
+
@components = @components.nil? ? comp_vec : Numo::NArray.vstack([@components, comp_vec])
|
81
|
+
end
|
82
|
+
self
|
83
|
+
end
|
84
|
+
|
85
|
+
# Fit the model with training data, and then transform them with the learned model.
|
86
|
+
#
|
87
|
+
# @overload fit_transform(x) -> Numo::DFloat
|
88
|
+
#
|
89
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
90
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
|
91
|
+
def fit_transform(x, _y = nil)
|
92
|
+
check_sample_array(x)
|
93
|
+
fit(x).transform(x)
|
94
|
+
end
|
95
|
+
|
96
|
+
# Transform the given data with the learned model.
|
97
|
+
#
|
98
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
|
99
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
|
100
|
+
def transform(x)
|
101
|
+
check_sample_array(x)
|
102
|
+
(x - @mean).dot(@components.transpose)
|
103
|
+
end
|
104
|
+
|
105
|
+
# Inverse transform the given transformed data with the learned model.
|
106
|
+
#
|
107
|
+
# @param z [Numo::DFloat] (shape: [n_samples, n_components]) The data to be restored into original space with the learned model.
|
108
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_featuress]) The restored data.
|
109
|
+
def inverse_transform(z)
|
110
|
+
check_sample_array(z)
|
111
|
+
c = @components.shape[1].nil? ? @components.expand_dims(0) : @components
|
112
|
+
z.dot(c) + @mean
|
113
|
+
end
|
114
|
+
|
115
|
+
# Dump marshal data.
|
116
|
+
# @return [Hash] The marshal data.
|
117
|
+
def marshal_dump
|
118
|
+
{ params: @params,
|
119
|
+
components: @components,
|
120
|
+
mean: @mean,
|
121
|
+
rng: @rng }
|
122
|
+
end
|
123
|
+
|
124
|
+
# Load marshal data.
|
125
|
+
# @return [nil]
|
126
|
+
def marshal_load(obj)
|
127
|
+
@params = obj[:params]
|
128
|
+
@components = obj[:components]
|
129
|
+
@mean = obj[:mean]
|
130
|
+
@rng = obj[:rng]
|
131
|
+
nil
|
132
|
+
end
|
133
|
+
|
134
|
+
private
|
135
|
+
|
136
|
+
def orthogonalize(pcvec)
|
137
|
+
unless @components.nil?
|
138
|
+
delta = @components.dot(pcvec) * @components.transpose
|
139
|
+
delta = delta.sum(1) unless delta.shape[1].nil?
|
140
|
+
pcvec -= delta
|
141
|
+
end
|
142
|
+
pcvec / Math.sqrt((pcvec**2).sum.abs) + 1.0e-12
|
143
|
+
end
|
144
|
+
|
145
|
+
def random_vec(n_features)
|
146
|
+
Numo::DFloat[*(Array.new(n_features) { @rng.rand })]
|
147
|
+
end
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
data/lib/svmkit/version.rb
CHANGED
data/svmkit.gemspec
CHANGED
@@ -18,7 +18,8 @@ SVMKit provides machine learning algorithms with interfaces similar to Scikit-Le
|
|
18
18
|
SVMKit currently supports Linear / Kernel Support Vector Machine,
|
19
19
|
Logistic Regression, Linear Regression, Ridge, Lasso, Factorization Machine,
|
20
20
|
Naive Bayes, Decision Tree, Random Forest, K-nearest neighbor algorithm,
|
21
|
-
K-Means, DBSCAN
|
21
|
+
K-Means, DBSCAN, Principal Component Analysis, Non-negative Matrix Factorization
|
22
|
+
and cross-validation.
|
22
23
|
MSG
|
23
24
|
spec.homepage = 'https://github.com/yoshoku/svmkit'
|
24
25
|
spec.license = 'BSD-2-Clause'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: svmkit
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-06-
|
11
|
+
date: 2018-06-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|
@@ -86,7 +86,8 @@ description: |
|
|
86
86
|
SVMKit currently supports Linear / Kernel Support Vector Machine,
|
87
87
|
Logistic Regression, Linear Regression, Ridge, Lasso, Factorization Machine,
|
88
88
|
Naive Bayes, Decision Tree, Random Forest, K-nearest neighbor algorithm,
|
89
|
-
K-Means, DBSCAN
|
89
|
+
K-Means, DBSCAN, Principal Component Analysis, Non-negative Matrix Factorization
|
90
|
+
and cross-validation.
|
90
91
|
email:
|
91
92
|
- yoshoku@outlook.com
|
92
93
|
executables: []
|
@@ -118,6 +119,8 @@ files:
|
|
118
119
|
- lib/svmkit/clustering/dbscan.rb
|
119
120
|
- lib/svmkit/clustering/k_means.rb
|
120
121
|
- lib/svmkit/dataset.rb
|
122
|
+
- lib/svmkit/decomposition/nmf.rb
|
123
|
+
- lib/svmkit/decomposition/pca.rb
|
121
124
|
- lib/svmkit/ensemble/random_forest_classifier.rb
|
122
125
|
- lib/svmkit/ensemble/random_forest_regressor.rb
|
123
126
|
- lib/svmkit/evaluation_measure/accuracy.rb
|