rumale-manifold 0.28.0 → 0.29.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENSE.txt +1 -1
- data/lib/rumale/manifold/hessian_eigenmaps.rb +151 -0
- data/lib/rumale/manifold/local_tangent_space_alignment.rb +142 -0
- data/lib/rumale/manifold/locally_linear_embedding.rb +1 -1
- data/lib/rumale/manifold/version.rb +1 -1
- data/lib/rumale/manifold.rb +2 -0
- metadata +11 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8d9fa70278c2f220eca36209fa6a29b33733826e1bacd26bfc96b6f84f2a93ff
|
4
|
+
data.tar.gz: e21890d3e97490958e5c43793d70de271c43f43386b91ecbe50703160a6305ce
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 23d964b84ff080aada02bcab0a5aec0707023cc28318e6ea4eb49ecf656cb5cf2c5261c1a7493e1e4624039a347e3bb4f343ef82e0d8c88f73385fe9f6616980
|
7
|
+
data.tar.gz: c2c32e786fd0b57ad0b7a2ff9987b551bfc61c90bfc0dfbfb39450350fbe075815e6fc9179a0ad1dfcd8f9142af39b4f829b5f04586bf2c5b109600d25710c13
|
data/LICENSE.txt
CHANGED
@@ -0,0 +1,151 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/estimator'
|
4
|
+
require 'rumale/base/transformer'
|
5
|
+
require 'rumale/pairwise_metric'
|
6
|
+
require 'rumale/validation'
|
7
|
+
|
8
|
+
module Rumale
|
9
|
+
module Manifold
|
10
|
+
# HessianEigenmaps is a class that implements Hessian Eigenmaps.
|
11
|
+
#
|
12
|
+
# @example
|
13
|
+
# require 'numo/linalg/autoloader'
|
14
|
+
# require 'rumale/manifold/hessian_eigenmaps'
|
15
|
+
#
|
16
|
+
# hem = Rumale::Manifold::HessianEigenmaps.new(n_components: 2, n_neighbors: 15)
|
17
|
+
# z = hem.fit_transform(x)
|
18
|
+
#
|
19
|
+
# *Reference*
|
20
|
+
# - Donoho, D. L., and Grimes, C., "Hessian eigenmaps: Locally linear embedding techniques for high-dimensional data," Proc. Natl. Acad. Sci. USA, vol. 100, no. 10, pp. 5591--5596, 2003.
|
21
|
+
class HessianEigenmaps < Rumale::Base::Estimator
|
22
|
+
include Rumale::Base::Transformer
|
23
|
+
|
24
|
+
# Return the data in representation space.
|
25
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components])
|
26
|
+
attr_reader :embedding
|
27
|
+
|
28
|
+
# Create a new transformer with Hessian Eigenmaps.
|
29
|
+
#
|
30
|
+
# @param n_components [Integer] The number of dimensions on representation space.
|
31
|
+
# @param n_neighbors [Integer] The number of nearest neighbors for k-nearest neighbor graph construction.
|
32
|
+
# @param reg_param [Float] The reguralization parameter for local gram matrix in transform method.
|
33
|
+
def initialize(n_neighbors: 5, n_components: 2, reg_param: 1e-6)
|
34
|
+
super()
|
35
|
+
@params = {
|
36
|
+
n_neighbors: n_neighbors,
|
37
|
+
n_components: n_components,
|
38
|
+
reg_param: reg_param
|
39
|
+
}
|
40
|
+
end
|
41
|
+
|
42
|
+
# Fit the model with given training data.
|
43
|
+
#
|
44
|
+
# @overload fit(x) -> HessianEigenmaps
|
45
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
46
|
+
# @return [HessianEigenmaps] The learned transformer itself.
|
47
|
+
def fit(x, _y = nil) # rubocop:disable Metrics/AbcSize
|
48
|
+
raise 'HessianEigenmaps#fit requires Numo::Linalg but that is not loaded' unless enable_linalg?(warning: false)
|
49
|
+
|
50
|
+
x = Rumale::Validation.check_convert_sample_array(x)
|
51
|
+
|
52
|
+
n_samples = x.shape[0]
|
53
|
+
distance_mat = Rumale::PairwiseMetric.squared_error(x)
|
54
|
+
neighbor_ids = neighbor_ids(distance_mat, @params[:n_neighbors], true)
|
55
|
+
|
56
|
+
tri_n_components = @params[:n_components] * (@params[:n_components] + 1) / 2
|
57
|
+
hessian_mat = Numo::DFloat.zeros(n_samples * tri_n_components, n_samples)
|
58
|
+
ones = Numo::DFloat.ones(@params[:n_neighbors], 1)
|
59
|
+
n_samples.times do |i|
|
60
|
+
tan_coords = tangent_coordinates(x[neighbor_ids[i, true], true])
|
61
|
+
xi = Numo::DFloat.zeros(@params[:n_neighbors], tri_n_components)
|
62
|
+
@params[:n_components].times do |m|
|
63
|
+
offset = Array.new(m + 1) { |v| v }.sum
|
64
|
+
(@params[:n_components] - m).times do |n|
|
65
|
+
xi[true, m * @params[:n_components] - offset + n] = tan_coords[true, m] * tan_coords[true, m + n]
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
xt, = Numo::Linalg.qr(Numo::DFloat.hstack([ones, tan_coords, xi]))
|
70
|
+
pii = xt[true, (@params[:n_components] + 1)..-1]
|
71
|
+
tri_n_components.times do |j|
|
72
|
+
pj_sum = pii[true, j].sum
|
73
|
+
normalizer = pj_sum <= 1e-8 ? 1 : 1.fdiv(pj_sum)
|
74
|
+
hessian_mat[i * tri_n_components + j, neighbor_ids[i, true]] = pii[true, j] * normalizer
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
kernel_mat = hessian_mat.transpose.dot(hessian_mat)
|
79
|
+
_, eig_vecs = Numo::Linalg.eigh(kernel_mat, vals_range: 1...(1 + @params[:n_components]))
|
80
|
+
|
81
|
+
@embedding = @params[:n_components] == 1 ? eig_vecs[true, 0].dup : eig_vecs.dup
|
82
|
+
@x_train = x.dup
|
83
|
+
|
84
|
+
self
|
85
|
+
end
|
86
|
+
|
87
|
+
# Fit the model with training data, and then transform them with the learned model.
|
88
|
+
#
|
89
|
+
# @overload fit_transform(x) -> Numo::DFloat
|
90
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
91
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
|
92
|
+
def fit_transform(x, _y = nil)
|
93
|
+
unless enable_linalg?(warning: false)
|
94
|
+
raise 'HessianEigenmaps#fit_transform requires Numo::Linalg but that is not loaded'
|
95
|
+
end
|
96
|
+
|
97
|
+
fit(x)
|
98
|
+
|
99
|
+
@embedding.dup
|
100
|
+
end
|
101
|
+
|
102
|
+
# Transform the given data with the learned model.
|
103
|
+
# For out-of-sample data embedding, the same method as Locally Linear Embedding is used.
|
104
|
+
#
|
105
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
|
106
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
|
107
|
+
def transform(x)
|
108
|
+
x = Rumale::Validation.check_convert_sample_array(x)
|
109
|
+
|
110
|
+
n_samples = x.shape[0]
|
111
|
+
tol = @params[:reg_param].fdiv(@params[:n_neighbors])
|
112
|
+
distance_mat = Rumale::PairwiseMetric.squared_error(x, @x_train)
|
113
|
+
neighbor_ids = neighbor_ids(distance_mat, @params[:n_neighbors], false)
|
114
|
+
weight_mat = Numo::DFloat.zeros(n_samples, @x_train.shape[0])
|
115
|
+
|
116
|
+
n_samples.times do |n|
|
117
|
+
x_local = @x_train[neighbor_ids[n, true], true] - x[n, true]
|
118
|
+
gram_mat = x_local.dot(x_local.transpose)
|
119
|
+
gram_mat += tol * weight_mat.trace * Numo::DFloat.eye(@params[:n_neighbors])
|
120
|
+
weights = Numo::Linalg.solve(gram_mat, Numo::DFloat.ones(@params[:n_neighbors]))
|
121
|
+
weights /= weights.sum + 1e-8
|
122
|
+
weight_mat[n, neighbor_ids[n, true]] = weights
|
123
|
+
end
|
124
|
+
|
125
|
+
weight_mat.dot(@embedding)
|
126
|
+
end
|
127
|
+
|
128
|
+
private
|
129
|
+
|
130
|
+
def neighbor_ids(distance_mat, n_neighbors, contain_self)
|
131
|
+
n_samples = distance_mat.shape[0]
|
132
|
+
neighbor_ids = Numo::Int32.zeros(n_samples, n_neighbors)
|
133
|
+
if contain_self
|
134
|
+
n_samples.times { |n| neighbor_ids[n, true] = (distance_mat[n, true].sort_index.to_a - [n])[0...n_neighbors] }
|
135
|
+
else
|
136
|
+
n_samples.times { |n| neighbor_ids[n, true] = distance_mat[n, true].sort_index.to_a[0...n_neighbors] }
|
137
|
+
end
|
138
|
+
neighbor_ids
|
139
|
+
end
|
140
|
+
|
141
|
+
def tangent_coordinates(x)
|
142
|
+
m = x.mean(axis: 0)
|
143
|
+
cx = x - m
|
144
|
+
cov_mat = cx.transpose.dot(cx)
|
145
|
+
n_features = x.shape[1]
|
146
|
+
_, evecs = Numo::Linalg.eigh(cov_mat, vals_range: (n_features - @params[:n_components])...n_features)
|
147
|
+
cx.dot(evecs.reverse(1))
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
@@ -0,0 +1,142 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/estimator'
|
4
|
+
require 'rumale/base/transformer'
|
5
|
+
require 'rumale/pairwise_metric'
|
6
|
+
require 'rumale/validation'
|
7
|
+
|
8
|
+
module Rumale
|
9
|
+
module Manifold
|
10
|
+
# LocalTangentSpaceAlignment is a class that implements Local Tangent Space Alignment.
|
11
|
+
#
|
12
|
+
# @example
|
13
|
+
# require 'numo/linalg/autoloader'
|
14
|
+
# require 'rumale/manifold/local_tangent_space_alignment'
|
15
|
+
#
|
16
|
+
# lem = Rumale::Manifold::LocalTangentSpaceAlignment.new(n_components: 2, n_neighbors: 15)
|
17
|
+
# z = lem.fit_transform(x)
|
18
|
+
#
|
19
|
+
# *Reference*
|
20
|
+
# - Zhang, A., and Zha, H., "Principal Manifolds and Nonlinear Diemnsion Reduction via Local Tangent Space Alignment," SIAM Journal on Scientific Computing, vol. 26, iss. 1, pp. 313-338, 2004.
|
21
|
+
class LocalTangentSpaceAlignment < Rumale::Base::Estimator
|
22
|
+
include Rumale::Base::Transformer
|
23
|
+
|
24
|
+
# Return the data in representation space.
|
25
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components])
|
26
|
+
attr_reader :embedding
|
27
|
+
|
28
|
+
# Create a new transformer with Local Tangent Space Alignment.
|
29
|
+
#
|
30
|
+
# @param n_components [Integer] The number of dimensions on representation space.
|
31
|
+
# @param n_neighbors [Integer] The number of nearest neighbors for finding k-nearest neighbors
|
32
|
+
# @param reg_param [Float] The reguralization parameter for local gram matrix in transform method.
|
33
|
+
def initialize(n_components: 2, n_neighbors: 10, reg_param: 1e-3)
|
34
|
+
super()
|
35
|
+
@params = {
|
36
|
+
n_components: n_components,
|
37
|
+
n_neighbors: [1, n_neighbors].max,
|
38
|
+
reg_param: reg_param
|
39
|
+
}
|
40
|
+
end
|
41
|
+
|
42
|
+
# Fit the model with given training data.
|
43
|
+
#
|
44
|
+
# @overload fit(x) -> LocalTangentSpaceAlignment
|
45
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
46
|
+
# @return [LocalTangentSpaceAlignment] The learned transformer itself.
|
47
|
+
def fit(x, _y = nil)
|
48
|
+
unless enable_linalg?(warning: false)
|
49
|
+
raise 'LocalTangentSpaceAlignment#fit requires Numo::Linalg but that is not loaded'
|
50
|
+
end
|
51
|
+
|
52
|
+
x = Rumale::Validation.check_convert_sample_array(x)
|
53
|
+
|
54
|
+
n_samples = x.shape[0]
|
55
|
+
distance_mat = Rumale::PairwiseMetric.squared_error(x)
|
56
|
+
neighbor_ids = neighbor_ids(distance_mat, @params[:n_neighbors], true)
|
57
|
+
|
58
|
+
affinity_mat = Numo::DFloat.zeros(n_samples, n_samples)
|
59
|
+
x_tangent = Numo::DFloat.zeros(@params[:n_neighbors], @params[:n_components] + 1)
|
60
|
+
x_tangent[true, 0] = 1.fdiv(Math.sqrt(@params[:n_neighbors]))
|
61
|
+
|
62
|
+
n_samples.times do |n|
|
63
|
+
x_local = x[neighbor_ids[n, true], true]
|
64
|
+
x_tangent[true, 1...] = right_singular_vectors(x_local, @params[:n_components])
|
65
|
+
weight_mat = x_tangent.dot(x_tangent.transpose)
|
66
|
+
neighbor_ids[n, true].each_with_index do |m, i|
|
67
|
+
affinity_mat[m, neighbor_ids[n, true]] -= weight_mat[i, true]
|
68
|
+
affinity_mat[m, m] += 1
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
kernel_mat = 0.5 * (affinity_mat.transpose + affinity_mat)
|
73
|
+
_, eig_vecs = Numo::Linalg.eigh(kernel_mat, vals_range: 1...(1 + @params[:n_components]))
|
74
|
+
|
75
|
+
@embedding = @params[:n_components] == 1 ? eig_vecs[true, 0].dup : eig_vecs.dup
|
76
|
+
@x_train = x.dup
|
77
|
+
|
78
|
+
self
|
79
|
+
end
|
80
|
+
|
81
|
+
# Fit the model with training data, and then transform them with the learned model.
|
82
|
+
#
|
83
|
+
# @overload fit_transform(x) -> Numo::DFloat
|
84
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
|
85
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data
|
86
|
+
def fit_transform(x, _y = nil)
|
87
|
+
unless enable_linalg?(warning: false)
|
88
|
+
raise 'LocalTangentSpaceAlignment#fit_transform requires Numo::Linalg but that is not loaded'
|
89
|
+
end
|
90
|
+
|
91
|
+
fit(x).transform(x)
|
92
|
+
end
|
93
|
+
|
94
|
+
# Transform the given data with the learned model.
|
95
|
+
# For out-of-sample data embedding, the same method as Locally Linear Embedding is used.
|
96
|
+
#
|
97
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model.
|
98
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data.
|
99
|
+
def transform(x)
|
100
|
+
x = Rumale::Validation.check_convert_sample_array(x)
|
101
|
+
|
102
|
+
n_samples = x.shape[0]
|
103
|
+
tol = @params[:reg_param].fdiv(@params[:n_neighbors])
|
104
|
+
distance_mat = Rumale::PairwiseMetric.squared_error(x, @x_train)
|
105
|
+
neighbor_ids = neighbor_ids(distance_mat, @params[:n_neighbors], false)
|
106
|
+
weight_mat = Numo::DFloat.zeros(n_samples, @x_train.shape[0])
|
107
|
+
|
108
|
+
n_samples.times do |n|
|
109
|
+
x_local = @x_train[neighbor_ids[n, true], true] - x[n, true]
|
110
|
+
gram_mat = x_local.dot(x_local.transpose)
|
111
|
+
gram_mat += tol * weight_mat.trace * Numo::DFloat.eye(@params[:n_neighbors])
|
112
|
+
weights = Numo::Linalg.solve(gram_mat, Numo::DFloat.ones(@params[:n_neighbors]))
|
113
|
+
weights /= weights.sum + 1e-8
|
114
|
+
weight_mat[n, neighbor_ids[n, true]] = weights
|
115
|
+
end
|
116
|
+
|
117
|
+
weight_mat.dot(@embedding)
|
118
|
+
end
|
119
|
+
|
120
|
+
private
|
121
|
+
|
122
|
+
def neighbor_ids(distance_mat, n_neighbors, contain_self)
|
123
|
+
n_samples = distance_mat.shape[0]
|
124
|
+
neighbor_ids = Numo::Int32.zeros(n_samples, n_neighbors)
|
125
|
+
if contain_self
|
126
|
+
n_samples.times { |n| neighbor_ids[n, true] = (distance_mat[n, true].sort_index.to_a - [n])[0...n_neighbors] }
|
127
|
+
else
|
128
|
+
n_samples.times { |n| neighbor_ids[n, true] = distance_mat[n, true].sort_index.to_a[0...n_neighbors] }
|
129
|
+
end
|
130
|
+
neighbor_ids
|
131
|
+
end
|
132
|
+
|
133
|
+
def right_singular_vectors(x_local, n_singulars)
|
134
|
+
n_samples = x_local.shape[0]
|
135
|
+
x_local -= x_local.mean(0)
|
136
|
+
gram_mat = x_local.dot(x_local.transpose)
|
137
|
+
_, evecs = Numo::Linalg.eigh(gram_mat, vals_range: (n_samples - n_singulars)...n_samples)
|
138
|
+
evecs.reverse(1).dup
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
@@ -7,7 +7,7 @@ require 'rumale/validation'
|
|
7
7
|
|
8
8
|
module Rumale
|
9
9
|
module Manifold
|
10
|
-
# LocallyLinearEmbedding is a class that implements
|
10
|
+
# LocallyLinearEmbedding is a class that implements Locally Linear Embedding.
|
11
11
|
#
|
12
12
|
# @example
|
13
13
|
# require 'numo/linalg/autoloader'
|
data/lib/rumale/manifold.rb
CHANGED
@@ -4,6 +4,8 @@ require 'numo/narray'
|
|
4
4
|
|
5
5
|
require_relative 'manifold/laplacian_eigenmaps'
|
6
6
|
require_relative 'manifold/locally_linear_embedding'
|
7
|
+
require_relative 'manifold/hessian_eigenmaps'
|
8
|
+
require_relative 'manifold/local_tangent_space_alignment'
|
7
9
|
require_relative 'manifold/mds'
|
8
10
|
require_relative 'manifold/tsne'
|
9
11
|
require_relative 'manifold/version'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rumale-manifold
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.29.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-03-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|
@@ -30,31 +30,32 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: 0.
|
33
|
+
version: 0.29.0
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: 0.
|
40
|
+
version: 0.29.0
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: rumale-decomposition
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: 0.
|
47
|
+
version: 0.29.0
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: 0.
|
54
|
+
version: 0.29.0
|
55
55
|
description: |
|
56
56
|
Rumale::Manifold provides data embedding algorithms,
|
57
|
-
such as Multi-dimensional Scaling
|
57
|
+
such as Multi-dimensional Scaling, Locally Linear Embedding, Laplacian Eigenmaps, Hessian Eigenmaps,
|
58
|
+
and t-distributed Stochastic Neighbor Embedding,
|
58
59
|
with Rumale interface.
|
59
60
|
email:
|
60
61
|
- yoshoku@outlook.com
|
@@ -65,7 +66,9 @@ files:
|
|
65
66
|
- LICENSE.txt
|
66
67
|
- README.md
|
67
68
|
- lib/rumale/manifold.rb
|
69
|
+
- lib/rumale/manifold/hessian_eigenmaps.rb
|
68
70
|
- lib/rumale/manifold/laplacian_eigenmaps.rb
|
71
|
+
- lib/rumale/manifold/local_tangent_space_alignment.rb
|
69
72
|
- lib/rumale/manifold/locally_linear_embedding.rb
|
70
73
|
- lib/rumale/manifold/mds.rb
|
71
74
|
- lib/rumale/manifold/tsne.rb
|
@@ -93,7 +96,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
93
96
|
- !ruby/object:Gem::Version
|
94
97
|
version: '0'
|
95
98
|
requirements: []
|
96
|
-
rubygems_version: 3.
|
99
|
+
rubygems_version: 3.5.7
|
97
100
|
signing_key:
|
98
101
|
specification_version: 4
|
99
102
|
summary: Rumale::Manifold provides data embedding algorithms with Rumale interface.
|