rumale 0.17.2 → 0.17.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/LICENSE.txt +1 -1
- data/lib/rumale.rb +1 -0
- data/lib/rumale/feature_extraction/feature_hasher.rb +2 -54
- data/lib/rumale/pipeline/feature_union.rb +69 -0
- data/lib/rumale/version.rb +1 -1
- data/rumale.gemspec +1 -0
- metadata +17 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 68b01301c9e7b9aa9d98f0a252e87fc0c5e5e888379e347660a76a63ae7f86a4
|
4
|
+
data.tar.gz: dd464110ff410705a20fa2446c884ed039b9f0ea3febfc4cd12ef8d6b542b736
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d95cf5140e5846431b812867a64927f51ede6f6a925ffdc3a43f3ae1add1ce33756ad6481ca7645edeed56084fe0f8d294f5032364ff664cae19e9edcdaac57e
|
7
|
+
data.tar.gz: 620bd2099ddd5ce14ff9525062938740835fe9714f1c42023f7782e963a96b423bba4921e35d134877aada0d05189d5213b90e5556b50799dadfebdeeaf59eba
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,7 @@
|
|
1
|
+
# 0.17.3
|
2
|
+
- Add pipeline class for [FeatureUnion](https://yoshoku.github.io/rumale/doc/Rumale/Pipeline/FeatureUnion.html).
|
3
|
+
- Fix to use mmh3 gem for generating hash value on [FeatureHasher](https://yoshoku.github.io/rumale/doc/Rumale/FeatureExtraction/FeatureHasher.html).
|
4
|
+
|
1
5
|
# 0.17.2
|
2
6
|
- Add transformer class for kernel approximation with [Nystroem](https://yoshoku.github.io/rumale/doc/Rumale/KernelApproximation/Nystroem.html) method.
|
3
7
|
- Delete array validation on [Pipeline](https://yoshoku.github.io/rumale/doc/Rumale/Pipeline/Pipeline.html) class considering that array of hash is given to HashVectorizer.
|
data/LICENSE.txt
CHANGED
data/lib/rumale.rb
CHANGED
@@ -25,6 +25,7 @@ require 'rumale/optimizer/adam'
|
|
25
25
|
require 'rumale/optimizer/nadam'
|
26
26
|
require 'rumale/optimizer/yellow_fin'
|
27
27
|
require 'rumale/pipeline/pipeline'
|
28
|
+
require 'rumale/pipeline/feature_union'
|
28
29
|
require 'rumale/kernel_approximation/rbf'
|
29
30
|
require 'rumale/kernel_approximation/nystroem'
|
30
31
|
require 'rumale/linear_model/base_linear_model'
|
@@ -1,5 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require 'mmh3'
|
3
4
|
require 'rumale/base/base_estimator'
|
4
5
|
require 'rumale/base/transformer'
|
5
6
|
|
@@ -72,7 +73,7 @@ module Rumale
|
|
72
73
|
val = v.is_a?(String) ? 1 : v
|
73
74
|
next if val.zero?
|
74
75
|
|
75
|
-
h =
|
76
|
+
h = Mmh3.hash32(k)
|
76
77
|
fid = h.abs % n_features
|
77
78
|
val *= h >= 0 ? 1 : -1 if alternate_sign?
|
78
79
|
z[i, fid] = val
|
@@ -91,59 +92,6 @@ module Rumale
|
|
91
92
|
def alternate_sign?
|
92
93
|
@params[:alternate_sign]
|
93
94
|
end
|
94
|
-
|
95
|
-
# MurmurHash3_32
|
96
|
-
# References:
|
97
|
-
# - https://en.wikipedia.org/wiki/MurmurHash
|
98
|
-
# - https://github.com/aappleby/smhasher
|
99
|
-
def murmur_hash(key_str, seed = 0)
|
100
|
-
keyb = key_str.bytes
|
101
|
-
key_len = keyb.size
|
102
|
-
n_blocks = key_len / 4
|
103
|
-
|
104
|
-
h = seed
|
105
|
-
(0...n_blocks * 4).step(4) do |bstart|
|
106
|
-
k = keyb[bstart + 3] << 24 | keyb[bstart + 2] << 16 | keyb[bstart + 1] << 8 | keyb[bstart + 0]
|
107
|
-
h ^= murmur_scramble(k)
|
108
|
-
h = murmur_rotl(h, 13)
|
109
|
-
h = (h * 5 + 0xe6546b64) & 0xFFFFFFFF
|
110
|
-
end
|
111
|
-
|
112
|
-
tail_id = n_blocks * 4
|
113
|
-
tail_sz = key_len & 3
|
114
|
-
|
115
|
-
k = 0
|
116
|
-
k ^= keyb[tail_id + 2] << 16 if tail_sz >= 3
|
117
|
-
k ^= keyb[tail_id + 1] << 8 if tail_sz >= 2
|
118
|
-
k ^= keyb[tail_id + 0] if tail_sz >= 1
|
119
|
-
h ^= murmur_scramble(k) if tail_sz.positive?
|
120
|
-
|
121
|
-
h = murmur_fmix(h ^ key_len)
|
122
|
-
|
123
|
-
if (h & 0x80000000).zero?
|
124
|
-
h
|
125
|
-
else
|
126
|
-
-((h ^ 0xFFFFFFFF) + 1)
|
127
|
-
end
|
128
|
-
end
|
129
|
-
|
130
|
-
def murmur_rotl(x, r)
|
131
|
-
(x << r | x >> (32 - r)) & 0xFFFFFFFF
|
132
|
-
end
|
133
|
-
|
134
|
-
def murmur_scramble(k)
|
135
|
-
k = (k * 0xcc9e2d51) & 0xFFFFFFFF
|
136
|
-
k = murmur_rotl(k, 15)
|
137
|
-
(k * 0x1b873593) & 0xFFFFFFFF
|
138
|
-
end
|
139
|
-
|
140
|
-
def murmur_fmix(h)
|
141
|
-
h ^= h >> 16
|
142
|
-
h = (h * 0x85ebca6b) & 0xFFFFFFFF
|
143
|
-
h ^= h >> 13
|
144
|
-
h = (h * 0xc2b2ae35) & 0xFFFFFFFF
|
145
|
-
h ^ (h >> 16)
|
146
|
-
end
|
147
95
|
end
|
148
96
|
end
|
149
97
|
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/validation'
|
4
|
+
require 'rumale/base/base_estimator'
|
5
|
+
|
6
|
+
module Rumale
|
7
|
+
module Pipeline
|
8
|
+
# FeatureUnion is a class that implements the function concatenating the multi-transformer results.
|
9
|
+
#
|
10
|
+
# @example
|
11
|
+
# fu = Rumale::Pipeline::FeatureUnion.new(
|
12
|
+
# transformers: {
|
13
|
+
# 'rbf': Rumale::KernelApproximation::RBF.new(gamma: 1.0, n_components: 96, random_seed: 1),
|
14
|
+
# 'pca': Rumale::Decomposition::PCA.new(n_components: 32)
|
15
|
+
# }
|
16
|
+
# )
|
17
|
+
# fu.fit(training_samples, traininig_labels)
|
18
|
+
# results = fu.predict(testing_samples)
|
19
|
+
#
|
20
|
+
# # > p results.shape[1]
|
21
|
+
# # > 128
|
22
|
+
#
|
23
|
+
class FeatureUnion
|
24
|
+
include Base::BaseEstimator
|
25
|
+
include Validation
|
26
|
+
|
27
|
+
# Return the transformers
|
28
|
+
# @return [Hash]
|
29
|
+
attr_reader :transformers
|
30
|
+
|
31
|
+
# Create a new feature union.
|
32
|
+
#
|
33
|
+
# @param transformers [Hash] List of transformers. The order of transforms follows the insertion order of hash keys.
|
34
|
+
def initialize(transformers:)
|
35
|
+
check_params_type(Hash, transformers: transformers)
|
36
|
+
@params = {}
|
37
|
+
@transformers = transformers
|
38
|
+
end
|
39
|
+
|
40
|
+
# Fit the model with given training data.
|
41
|
+
#
|
42
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the transformers.
|
43
|
+
# @param y [Numo::NArray/Nil] (shape: [n_samples, n_outputs]) The target values or labels to be used for fitting the transformers.
|
44
|
+
# @return [FeatureUnion] The learned feature union itself.
|
45
|
+
def fit(x, y = nil)
|
46
|
+
@transformers.each { |_k, t| t.fit(x, y) }
|
47
|
+
self
|
48
|
+
end
|
49
|
+
|
50
|
+
# Fit the model with training data, and then transform them with the learned model.
|
51
|
+
#
|
52
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the transformers.
|
53
|
+
# @param y [Numo::NArray/Nil] (shape: [n_samples, n_outputs]) The target values or labels to be used for fitting the transformers.
|
54
|
+
# @return [Numo::DFloat] (shape: [n_samples, sum_n_components]) The transformed and concatenated data.
|
55
|
+
def fit_transform(x, y = nil)
|
56
|
+
fit(x, y).transform(x)
|
57
|
+
end
|
58
|
+
|
59
|
+
# Transform the given data with the learned model.
|
60
|
+
#
|
61
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned transformers.
|
62
|
+
# @return [Numo::DFloat] (shape: [n_samples, sum_n_components]) The transformed and concatenated data.
|
63
|
+
def transform(x)
|
64
|
+
z = @transformers.values.map { |t| t.transform(x) }
|
65
|
+
Numo::NArray.hstack(z)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
data/lib/rumale/version.rb
CHANGED
data/rumale.gemspec
CHANGED
@@ -45,6 +45,7 @@ Gem::Specification.new do |spec|
|
|
45
45
|
spec.required_ruby_version = '>= 2.3'
|
46
46
|
|
47
47
|
spec.add_runtime_dependency 'numo-narray', '>= 0.9.1'
|
48
|
+
spec.add_runtime_dependency 'mmh3', '~> 0.1'
|
48
49
|
|
49
50
|
spec.add_development_dependency 'bundler', '~> 2.0'
|
50
51
|
spec.add_development_dependency 'coveralls', '~> 0.8'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rumale
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.17.
|
4
|
+
version: 0.17.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-02-
|
11
|
+
date: 2020-02-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: numo-narray
|
@@ -24,6 +24,20 @@ dependencies:
|
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: 0.9.1
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: mmh3
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0.1'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0.1'
|
27
41
|
- !ruby/object:Gem::Dependency
|
28
42
|
name: bundler
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -247,6 +261,7 @@ files:
|
|
247
261
|
- lib/rumale/optimizer/sgd.rb
|
248
262
|
- lib/rumale/optimizer/yellow_fin.rb
|
249
263
|
- lib/rumale/pairwise_metric.rb
|
264
|
+
- lib/rumale/pipeline/feature_union.rb
|
250
265
|
- lib/rumale/pipeline/pipeline.rb
|
251
266
|
- lib/rumale/polynomial_model/base_factorization_machine.rb
|
252
267
|
- lib/rumale/polynomial_model/factorization_machine_classifier.rb
|