rumale 0.17.2 → 0.17.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/LICENSE.txt +1 -1
- data/lib/rumale.rb +1 -0
- data/lib/rumale/feature_extraction/feature_hasher.rb +2 -54
- data/lib/rumale/pipeline/feature_union.rb +69 -0
- data/lib/rumale/version.rb +1 -1
- data/rumale.gemspec +1 -0
- metadata +17 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 68b01301c9e7b9aa9d98f0a252e87fc0c5e5e888379e347660a76a63ae7f86a4
|
4
|
+
data.tar.gz: dd464110ff410705a20fa2446c884ed039b9f0ea3febfc4cd12ef8d6b542b736
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d95cf5140e5846431b812867a64927f51ede6f6a925ffdc3a43f3ae1add1ce33756ad6481ca7645edeed56084fe0f8d294f5032364ff664cae19e9edcdaac57e
|
7
|
+
data.tar.gz: 620bd2099ddd5ce14ff9525062938740835fe9714f1c42023f7782e963a96b423bba4921e35d134877aada0d05189d5213b90e5556b50799dadfebdeeaf59eba
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
# 0.17.3
|
2
|
+
- Add pipeline class for [FeatureUnion](https://yoshoku.github.io/rumale/doc/Rumale/Pipeline/FeatureUnion.html).
|
3
|
+
- Fix to use mmh3 gem for generating hash value on [FeatureHasher](https://yoshoku.github.io/rumale/doc/Rumale/FeatureExtraction/FeatureHasher.html).
|
4
|
+
|
1
5
|
# 0.17.2
|
2
6
|
- Add transformer class for kernel approximation with [Nystroem](https://yoshoku.github.io/rumale/doc/Rumale/KernelApproximation/Nystroem.html) method.
|
3
7
|
- Delete array validation on [Pipeline](https://yoshoku.github.io/rumale/doc/Rumale/Pipeline/Pipeline.html) class considering that array of hash is given to HashVectorizer.
|
data/LICENSE.txt
CHANGED
data/lib/rumale.rb
CHANGED
@@ -25,6 +25,7 @@ require 'rumale/optimizer/adam'
|
|
25
25
|
require 'rumale/optimizer/nadam'
|
26
26
|
require 'rumale/optimizer/yellow_fin'
|
27
27
|
require 'rumale/pipeline/pipeline'
|
28
|
+
require 'rumale/pipeline/feature_union'
|
28
29
|
require 'rumale/kernel_approximation/rbf'
|
29
30
|
require 'rumale/kernel_approximation/nystroem'
|
30
31
|
require 'rumale/linear_model/base_linear_model'
|
@@ -1,5 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require 'mmh3'
|
3
4
|
require 'rumale/base/base_estimator'
|
4
5
|
require 'rumale/base/transformer'
|
5
6
|
|
@@ -72,7 +73,7 @@ module Rumale
|
|
72
73
|
val = v.is_a?(String) ? 1 : v
|
73
74
|
next if val.zero?
|
74
75
|
|
75
|
-
h =
|
76
|
+
h = Mmh3.hash32(k)
|
76
77
|
fid = h.abs % n_features
|
77
78
|
val *= h >= 0 ? 1 : -1 if alternate_sign?
|
78
79
|
z[i, fid] = val
|
@@ -91,59 +92,6 @@ module Rumale
|
|
91
92
|
def alternate_sign?
|
92
93
|
@params[:alternate_sign]
|
93
94
|
end
|
94
|
-
|
95
|
-
# MurmurHash3_32
|
96
|
-
# References:
|
97
|
-
# - https://en.wikipedia.org/wiki/MurmurHash
|
98
|
-
# - https://github.com/aappleby/smhasher
|
99
|
-
def murmur_hash(key_str, seed = 0)
|
100
|
-
keyb = key_str.bytes
|
101
|
-
key_len = keyb.size
|
102
|
-
n_blocks = key_len / 4
|
103
|
-
|
104
|
-
h = seed
|
105
|
-
(0...n_blocks * 4).step(4) do |bstart|
|
106
|
-
k = keyb[bstart + 3] << 24 | keyb[bstart + 2] << 16 | keyb[bstart + 1] << 8 | keyb[bstart + 0]
|
107
|
-
h ^= murmur_scramble(k)
|
108
|
-
h = murmur_rotl(h, 13)
|
109
|
-
h = (h * 5 + 0xe6546b64) & 0xFFFFFFFF
|
110
|
-
end
|
111
|
-
|
112
|
-
tail_id = n_blocks * 4
|
113
|
-
tail_sz = key_len & 3
|
114
|
-
|
115
|
-
k = 0
|
116
|
-
k ^= keyb[tail_id + 2] << 16 if tail_sz >= 3
|
117
|
-
k ^= keyb[tail_id + 1] << 8 if tail_sz >= 2
|
118
|
-
k ^= keyb[tail_id + 0] if tail_sz >= 1
|
119
|
-
h ^= murmur_scramble(k) if tail_sz.positive?
|
120
|
-
|
121
|
-
h = murmur_fmix(h ^ key_len)
|
122
|
-
|
123
|
-
if (h & 0x80000000).zero?
|
124
|
-
h
|
125
|
-
else
|
126
|
-
-((h ^ 0xFFFFFFFF) + 1)
|
127
|
-
end
|
128
|
-
end
|
129
|
-
|
130
|
-
def murmur_rotl(x, r)
|
131
|
-
(x << r | x >> (32 - r)) & 0xFFFFFFFF
|
132
|
-
end
|
133
|
-
|
134
|
-
def murmur_scramble(k)
|
135
|
-
k = (k * 0xcc9e2d51) & 0xFFFFFFFF
|
136
|
-
k = murmur_rotl(k, 15)
|
137
|
-
(k * 0x1b873593) & 0xFFFFFFFF
|
138
|
-
end
|
139
|
-
|
140
|
-
def murmur_fmix(h)
|
141
|
-
h ^= h >> 16
|
142
|
-
h = (h * 0x85ebca6b) & 0xFFFFFFFF
|
143
|
-
h ^= h >> 13
|
144
|
-
h = (h * 0xc2b2ae35) & 0xFFFFFFFF
|
145
|
-
h ^ (h >> 16)
|
146
|
-
end
|
147
95
|
end
|
148
96
|
end
|
149
97
|
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/validation'
|
4
|
+
require 'rumale/base/base_estimator'
|
5
|
+
|
6
|
+
module Rumale
|
7
|
+
module Pipeline
|
8
|
+
# FeatureUnion is a class that implements the function concatenating the multi-transformer results.
|
9
|
+
#
|
10
|
+
# @example
|
11
|
+
# fu = Rumale::Pipeline::FeatureUnion.new(
|
12
|
+
# transformers: {
|
13
|
+
# 'rbf': Rumale::KernelApproximation::RBF.new(gamma: 1.0, n_components: 96, random_seed: 1),
|
14
|
+
# 'pca': Rumale::Decomposition::PCA.new(n_components: 32)
|
15
|
+
# }
|
16
|
+
# )
|
17
|
+
# fu.fit(training_samples, traininig_labels)
|
18
|
+
# results = fu.predict(testing_samples)
|
19
|
+
#
|
20
|
+
# # > p results.shape[1]
|
21
|
+
# # > 128
|
22
|
+
#
|
23
|
+
class FeatureUnion
|
24
|
+
include Base::BaseEstimator
|
25
|
+
include Validation
|
26
|
+
|
27
|
+
# Return the transformers
|
28
|
+
# @return [Hash]
|
29
|
+
attr_reader :transformers
|
30
|
+
|
31
|
+
# Create a new feature union.
|
32
|
+
#
|
33
|
+
# @param transformers [Hash] List of transformers. The order of transforms follows the insertion order of hash keys.
|
34
|
+
def initialize(transformers:)
|
35
|
+
check_params_type(Hash, transformers: transformers)
|
36
|
+
@params = {}
|
37
|
+
@transformers = transformers
|
38
|
+
end
|
39
|
+
|
40
|
+
# Fit the model with given training data.
|
41
|
+
#
|
42
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the transformers.
|
43
|
+
# @param y [Numo::NArray/Nil] (shape: [n_samples, n_outputs]) The target values or labels to be used for fitting the transformers.
|
44
|
+
# @return [FeatureUnion] The learned feature union itself.
|
45
|
+
def fit(x, y = nil)
|
46
|
+
@transformers.each { |_k, t| t.fit(x, y) }
|
47
|
+
self
|
48
|
+
end
|
49
|
+
|
50
|
+
# Fit the model with training data, and then transform them with the learned model.
|
51
|
+
#
|
52
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the transformers.
|
53
|
+
# @param y [Numo::NArray/Nil] (shape: [n_samples, n_outputs]) The target values or labels to be used for fitting the transformers.
|
54
|
+
# @return [Numo::DFloat] (shape: [n_samples, sum_n_components]) The transformed and concatenated data.
|
55
|
+
def fit_transform(x, y = nil)
|
56
|
+
fit(x, y).transform(x)
|
57
|
+
end
|
58
|
+
|
59
|
+
# Transform the given data with the learned model.
|
60
|
+
#
|
61
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned transformers.
|
62
|
+
# @return [Numo::DFloat] (shape: [n_samples, sum_n_components]) The transformed and concatenated data.
|
63
|
+
def transform(x)
|
64
|
+
z = @transformers.values.map { |t| t.transform(x) }
|
65
|
+
Numo::NArray.hstack(z)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
data/lib/rumale/version.rb
CHANGED
data/rumale.gemspec
CHANGED
@@ -45,6 +45,7 @@ Gem::Specification.new do |spec|
|
|
45
45
|
spec.required_ruby_version = '>= 2.3'
|
46
46
|
|
47
47
|
spec.add_runtime_dependency 'numo-narray', '>= 0.9.1'
|
48
|
+
spec.add_runtime_dependency 'mmh3', '~> 0.1'
|
48
49
|
|
49
50
|
spec.add_development_dependency 'bundler', '~> 2.0'
|
50
51
|
spec.add_development_dependency 'coveralls', '~> 0.8'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rumale
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.17.
|
4
|
+
version: 0.17.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-02-
|
11
|
+
date: 2020-02-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|
@@ -24,6 +24,20 @@ dependencies:
|
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: 0.9.1
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: mmh3
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0.1'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0.1'
|
27
41
|
- !ruby/object:Gem::Dependency
|
28
42
|
name: bundler
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -247,6 +261,7 @@ files:
|
|
247
261
|
- lib/rumale/optimizer/sgd.rb
|
248
262
|
- lib/rumale/optimizer/yellow_fin.rb
|
249
263
|
- lib/rumale/pairwise_metric.rb
|
264
|
+
- lib/rumale/pipeline/feature_union.rb
|
250
265
|
- lib/rumale/pipeline/pipeline.rb
|
251
266
|
- lib/rumale/polynomial_model/base_factorization_machine.rb
|
252
267
|
- lib/rumale/polynomial_model/factorization_machine_classifier.rb
|