svmkit 0.5.2 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 917f85878296b940b497f13253e3d3b03047be8f154d554116c2629aaeea55dd
4
- data.tar.gz: 16308e4638b15a55843f15b4e0d97886f27aae0cc236c59c590a8f9fe7f0e5c6
3
+ metadata.gz: d8486463886064e5aa5169dbed20101a01d01101483226cf96e38b377144e153
4
+ data.tar.gz: 79807cdbe9f10fba17cd91dbcd68c713180fd986e9b6fab575a849c762bb0d2e
5
5
  SHA512:
6
- metadata.gz: d390d3ef0d7b06676e6d3c34479939b4a99ee01472816eacbe49fd3f40224ef5984620dfe6d335fb5b15e7213d3b0d17ba9441766e7cdd08c8bad9bff669db8d
7
- data.tar.gz: ab2239c0d1297e18e31940e763875ac24668d8c4c3f30355f06bc5ed305c247ff0328e1d584c5ab70ce77d4d2f946dcc5f72f1eb4c3a25d9b0dcd38e1d246182
6
+ metadata.gz: 2ec6c45c99bda82813644b7b4a043a4dda27810e24e10b4588182cbc3dea106eb05ac013354177861ab4abf552d89670ea1f0d39d50976a232feedc0daee9030
7
+ data.tar.gz: 373a566f294bab3d6fb232516f5720a877b6e20999c3253ae49044d759f83b06999fef4e259df91b950fe49c395042c1a74675fadb326460a7e65d54048ca325
data/HISTORY.md CHANGED
@@ -1,3 +1,7 @@
1
+ # 0.6.0
2
+ - Add class for Principal Component Analysis.
3
+ - Add class for Non-negative Matrix Factorization.
4
+
1
5
  # 0.5.2
2
6
  - Add class for DBSCAN clustering.
3
7
 
data/README.md CHANGED
@@ -10,7 +10,8 @@ SVMKit provides machine learning algorithms with interfaces similar to Scikit-Le
10
10
  SVMKit currently supports Linear / Kernel Support Vector Machine,
11
11
  Logistic Regression, Linear Regression, Ridge, Lasso, Factorization Machine,
12
12
  Naive Bayes, Decision Tree, Random Forest, K-nearest neighbor classifier,
13
- K-Means, DBSCAN and cross-validation.
13
+ K-Means, DBSCAN, Principal Component Analysis, Non-negative Matrix Factorization
14
+ and cross-validation.
14
15
 
15
16
  ## Installation
16
17
 
@@ -39,6 +39,8 @@ require 'svmkit/ensemble/random_forest_classifier'
39
39
  require 'svmkit/ensemble/random_forest_regressor'
40
40
  require 'svmkit/clustering/k_means'
41
41
  require 'svmkit/clustering/dbscan'
42
+ require 'svmkit/decomposition/pca'
43
+ require 'svmkit/decomposition/nmf'
42
44
  require 'svmkit/preprocessing/l2_normalizer'
43
45
  require 'svmkit/preprocessing/min_max_scaler'
44
46
  require 'svmkit/preprocessing/standard_scaler'
@@ -0,0 +1,147 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'svmkit/validation'
4
+ require 'svmkit/base/base_estimator'
5
+ require 'svmkit/base/transformer'
6
+
7
+ module SVMKit
8
+ module Decomposition
9
+ # NMF is a class that implements Non-negative Matrix Factorization.
10
+ #
11
+ # @example
12
+ # decomposer = SVMKit::Decomposition::NMF.new(n_components: 2)
13
+ # representaion = decomposer.fit_transform(samples)
14
+ #
15
+ # *Reference*
16
+ # - W. Xu, X. Liu, and Y.Gong, "Document Clustering Based On Non-negative Matrix Factorization," Proc. SIGIR' 03 , pp. 267--273, 2003.
17
+ class NMF
18
+ include Base::BaseEstimator
19
+ include Base::Transformer
20
+ include Validation
21
+
22
+ # Returns the factorization matrix.
23
+ # @return [Numo::DFloat] (shape: [n_components, n_features])
24
+ attr_reader :components
25
+
26
+ # Return the random generator.
27
+ # @return [Random]
28
+ attr_reader :rng
29
+
30
+ # Create a new transformer with NMF.
31
+ #
32
+ # @param n_components [Integer] The number of components.
33
+ # @param max_iter [Integer] The maximum number of iterations.
34
+ # @param tol [Float] The tolerance of termination criterion.
35
+ # @param eps [Float] A small value close to zero to avoid zero division error.
36
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
37
+ def initialize(n_components: 2, max_iter: 500, tol: 1.0e-4, eps: 1.0e-16, random_seed: nil)
38
+ check_params_integer(n_components: n_components, max_iter: max_iter)
39
+ check_params_float(tol: tol, eps: eps)
40
+ check_params_type_or_nil(Integer, random_seed: random_seed)
41
+ check_params_positive(n_components: n_components, max_iter: max_iter, tol: tol, eps: eps)
42
+ @params = {}
43
+ @params[:n_components] = n_components
44
+ @params[:max_iter] = max_iter
45
+ @params[:tol] = tol
46
+ @params[:eps] = eps
47
+ @params[:random_seed] = random_seed
48
+ @params[:random_seed] ||= srand
49
+ @components = nil
50
+ @rng = Random.new(@params[:random_seed])
51
+ end
52
+
53
+ # Fit the model with given training data.
54
+ #
55
+ # @overload fit(x) -> NMF
56
+ #
57
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
58
+ # @return [NMF] The learned transformer itself.
59
+ def fit(x, _y = nil)
60
+ check_sample_array(x)
61
+ partial_fit(x)
62
+ self
63
+ end
64
+
65
+ # Fit the model with training data, and then transform them with the learned model.
66
+ #
67
+ # @overload fit_transform(x) -> Numo::DFloat
68
+ #
69
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
70
+ # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
71
+ def fit_transform(x, _y = nil)
72
+ check_sample_array(x)
73
+ partial_fit(x)
74
+ end
75
+
76
+ # Transform the given data with the learned model.
77
+ #
78
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
79
+ # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
80
+ def transform(x)
81
+ check_sample_array(x)
82
+ partial_fit(x, false)
83
+ end
84
+
85
+ # Inverse transform the given transformed data with the learned model.
86
+ #
87
+ # @param z [Numo::DFloat] (shape: [n_samples, n_components]) The data to be restored into original space with the learned model.
88
+ # @return [Numo::DFloat] (shape: [n_samples, n_featuress]) The restored data.
89
+ def inverse_transform(z)
90
+ check_sample_array(z)
91
+ z.dot(@components)
92
+ end
93
+
94
+ # Dump marshal data.
95
+ # @return [Hash] The marshal data.
96
+ def marshal_dump
97
+ { params: @params,
98
+ components: @components,
99
+ rng: @rng }
100
+ end
101
+
102
+ # Load marshal data.
103
+ # @return [nil]
104
+ def marshal_load(obj)
105
+ @params = obj[:params]
106
+ @components = obj[:components]
107
+ @rng = obj[:rng]
108
+ nil
109
+ end
110
+
111
+ private
112
+
113
+ def partial_fit(x, update_comps = true)
114
+ # initialize some variables.
115
+ n_samples, n_features = x.shape
116
+ scale = Math.sqrt(x.mean / @params[:n_components])
117
+ @components = rand_uniform([@params[:n_components], n_features]) * scale if update_comps
118
+ coefficients = rand_uniform([n_samples, @params[:n_components]]) * scale
119
+ # optimization.
120
+ @params[:max_iter].times do
121
+ # update
122
+ if update_comps
123
+ nume = coefficients.transpose.dot(x)
124
+ deno = (coefficients.transpose.dot(coefficients)).dot(@components) + @params[:eps]
125
+ @components *= (nume / deno)
126
+ end
127
+ nume = x.dot(@components.transpose)
128
+ deno = (coefficients.dot(@components)).dot(@components.transpose) + @params[:eps]
129
+ coefficients *= (nume / deno)
130
+ # normalize
131
+ norm = Numo::NMath.sqrt((@components**2).sum(1)) + @params[:eps]
132
+ @components /= norm.expand_dims(1) if update_comps
133
+ coefficients *= norm
134
+ # check convergence
135
+ err = ((x - coefficients.dot(@components))**2).sum(1).mean
136
+ break if err < @params[:tol]
137
+ end
138
+ coefficients
139
+ end
140
+
141
+ def rand_uniform(shape)
142
+ rnd_vals = Array.new(shape.inject(:*)) { @rng.rand }
143
+ Numo::DFloat.asarray(rnd_vals).reshape(shape[0], shape[1])
144
+ end
145
+ end
146
+ end
147
+ end
@@ -0,0 +1,150 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'svmkit/validation'
4
+ require 'svmkit/base/base_estimator'
5
+ require 'svmkit/base/transformer'
6
+
7
+ module SVMKit
8
+ # Module for matrix decomposition algorithms.
9
+ module Decomposition
10
+ # PCA is a class that implements Principal Component Analysis.
11
+ #
12
+ # @example
13
+ # decomposer = SVMKit::Decomposition::PCA.new(n_components: 2)
14
+ # representaion = decomposer.fit_transform(samples)
15
+ #
16
+ # *Reference*
17
+ # - A. Sharma and K K. Paliwal, "Fast principal component analysis using fixed-point algorithm," Pattern Recognition Letters, 28, pp. 1151--1155, 2007.
18
+ class PCA
19
+ include Base::BaseEstimator
20
+ include Base::Transformer
21
+ include Validation
22
+
23
+ # Returns the principal components.
24
+ # @return [Numo::DFloat] (shape: [n_components, n_features])
25
+ attr_reader :components
26
+
27
+ # Returns the mean vector.
28
+ # @return [Numo::DFloat] (shape: [n_features]
29
+ attr_reader :mean
30
+
31
+ # Return the random generator.
32
+ # @return [Random]
33
+ attr_reader :rng
34
+
35
+ # Create a new transformer with PCA.
36
+ #
37
+ # @param n_components [Integer] The number of principal components.
38
+ # @param max_iter [Integer] The maximum number of iterations.
39
+ # @param tol [Float] The tolerance of termination criterion.
40
+ # @param random_seed [Integer] The seed value using to initialize the random generator.
41
+ def initialize(n_components: 2, max_iter: 100, tol: 1.0e-4, random_seed: nil)
42
+ check_params_integer(n_components: n_components, max_iter: max_iter)
43
+ check_params_float(tol: tol)
44
+ check_params_type_or_nil(Integer, random_seed: random_seed)
45
+ check_params_positive(n_components: n_components, max_iter: max_iter, tol: tol)
46
+ @params = {}
47
+ @params[:n_components] = n_components
48
+ @params[:max_iter] = max_iter
49
+ @params[:tol] = tol
50
+ @params[:random_seed] = random_seed
51
+ @params[:random_seed] ||= srand
52
+ @components = nil
53
+ @mean = nil
54
+ @rng = Random.new(@params[:random_seed])
55
+ end
56
+
57
+ # Fit the model with given training data.
58
+ #
59
+ # @overload fit(x) -> PCA
60
+ #
61
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
62
+ # @return [PCA] The learned transformer itself.
63
+ def fit(x, _y = nil)
64
+ check_sample_array(x)
65
+ # initialize some variables.
66
+ @components = nil
67
+ n_samples, n_features = x.shape
68
+ # centering.
69
+ @mean = x.mean(0)
70
+ centered_x = x - @mean
71
+ # optimization.
72
+ covariance_mat = centered_x.transpose.dot(centered_x) / (n_samples - 1)
73
+ @params[:n_components].times do
74
+ comp_vec = random_vec(n_features)
75
+ @params[:max_iter].times do
76
+ updated = orthogonalize(covariance_mat.dot(comp_vec))
77
+ break if (updated.dot(comp_vec) - 1).abs < @params[:tol]
78
+ comp_vec = updated
79
+ end
80
+ @components = @components.nil? ? comp_vec : Numo::NArray.vstack([@components, comp_vec])
81
+ end
82
+ self
83
+ end
84
+
85
+ # Fit the model with training data, and then transform them with the learned model.
86
+ #
87
+ # @overload fit_transform(x) -> Numo::DFloat
88
+ #
89
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
90
+ # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
91
+ def fit_transform(x, _y = nil)
92
+ check_sample_array(x)
93
+ fit(x).transform(x)
94
+ end
95
+
96
+ # Transform the given data with the learned model.
97
+ #
98
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
99
+ # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
100
+ def transform(x)
101
+ check_sample_array(x)
102
+ (x - @mean).dot(@components.transpose)
103
+ end
104
+
105
+ # Inverse transform the given transformed data with the learned model.
106
+ #
107
+ # @param z [Numo::DFloat] (shape: [n_samples, n_components]) The data to be restored into original space with the learned model.
108
+ # @return [Numo::DFloat] (shape: [n_samples, n_featuress]) The restored data.
109
+ def inverse_transform(z)
110
+ check_sample_array(z)
111
+ c = @components.shape[1].nil? ? @components.expand_dims(0) : @components
112
+ z.dot(c) + @mean
113
+ end
114
+
115
+ # Dump marshal data.
116
+ # @return [Hash] The marshal data.
117
+ def marshal_dump
118
+ { params: @params,
119
+ components: @components,
120
+ mean: @mean,
121
+ rng: @rng }
122
+ end
123
+
124
+ # Load marshal data.
125
+ # @return [nil]
126
+ def marshal_load(obj)
127
+ @params = obj[:params]
128
+ @components = obj[:components]
129
+ @mean = obj[:mean]
130
+ @rng = obj[:rng]
131
+ nil
132
+ end
133
+
134
+ private
135
+
136
+ def orthogonalize(pcvec)
137
+ unless @components.nil?
138
+ delta = @components.dot(pcvec) * @components.transpose
139
+ delta = delta.sum(1) unless delta.shape[1].nil?
140
+ pcvec -= delta
141
+ end
142
+ pcvec / Math.sqrt((pcvec**2).sum.abs) + 1.0e-12
143
+ end
144
+
145
+ def random_vec(n_features)
146
+ Numo::DFloat[*(Array.new(n_features) { @rng.rand })]
147
+ end
148
+ end
149
+ end
150
+ end
@@ -3,5 +3,5 @@
3
3
  # SVMKit is a machine learning library in Ruby.
4
4
  module SVMKit
5
5
  # @!visibility private
6
- VERSION = '0.5.2'.freeze
6
+ VERSION = '0.6.0'.freeze
7
7
  end
@@ -18,7 +18,8 @@ SVMKit provides machine learning algorithms with interfaces similar to Scikit-Le
18
18
  SVMKit currently supports Linear / Kernel Support Vector Machine,
19
19
  Logistic Regression, Linear Regression, Ridge, Lasso, Factorization Machine,
20
20
  Naive Bayes, Decision Tree, Random Forest, K-nearest neighbor algorithm,
21
- K-Means, DBSCAN and cross-validation.
21
+ K-Means, DBSCAN, Principal Component Analysis, Non-negative Matrix Factorization
22
+ and cross-validation.
22
23
  MSG
23
24
  spec.homepage = 'https://github.com/yoshoku/svmkit'
24
25
  spec.license = 'BSD-2-Clause'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: svmkit
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.2
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-06-23 00:00:00.000000000 Z
11
+ date: 2018-06-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray
@@ -86,7 +86,8 @@ description: |
86
86
  SVMKit currently supports Linear / Kernel Support Vector Machine,
87
87
  Logistic Regression, Linear Regression, Ridge, Lasso, Factorization Machine,
88
88
  Naive Bayes, Decision Tree, Random Forest, K-nearest neighbor algorithm,
89
- K-Means, DBSCAN and cross-validation.
89
+ K-Means, DBSCAN, Principal Component Analysis, Non-negative Matrix Factorization
90
+ and cross-validation.
90
91
  email:
91
92
  - yoshoku@outlook.com
92
93
  executables: []
@@ -118,6 +119,8 @@ files:
118
119
  - lib/svmkit/clustering/dbscan.rb
119
120
  - lib/svmkit/clustering/k_means.rb
120
121
  - lib/svmkit/dataset.rb
122
+ - lib/svmkit/decomposition/nmf.rb
123
+ - lib/svmkit/decomposition/pca.rb
121
124
  - lib/svmkit/ensemble/random_forest_classifier.rb
122
125
  - lib/svmkit/ensemble/random_forest_regressor.rb
123
126
  - lib/svmkit/evaluation_measure/accuracy.rb