rumale 0.17.2 → 0.17.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '09585216da740231269c5675c48a8fa2ee3a8aba3f5c5b7d671429d113aaa4ba'
4
- data.tar.gz: 66eb473c718f6a03938db19df06694373c781e098d68e2f2c20162865dc4f9f8
3
+ metadata.gz: 68b01301c9e7b9aa9d98f0a252e87fc0c5e5e888379e347660a76a63ae7f86a4
4
+ data.tar.gz: dd464110ff410705a20fa2446c884ed039b9f0ea3febfc4cd12ef8d6b542b736
5
5
  SHA512:
6
- metadata.gz: d68eaa297116c4b834cbf2745355d07e4e4b0e50b170c631c417f623d7a0e75a515389903b6e2cb6f22355f9cf5eded4968eac5c7f216f5a98698bb5283c2e00
7
- data.tar.gz: ddb3be08dc88cc99f5c7086e8bea83496f2644316d382f052a954ce6d1056bc2aabfbd2664ff572a55fdb1017a9bbb0d790d4d83b3788b781d133a97d26bd92e
6
+ metadata.gz: d95cf5140e5846431b812867a64927f51ede6f6a925ffdc3a43f3ae1add1ce33756ad6481ca7645edeed56084fe0f8d294f5032364ff664cae19e9edcdaac57e
7
+ data.tar.gz: 620bd2099ddd5ce14ff9525062938740835fe9714f1c42023f7782e963a96b423bba4921e35d134877aada0d05189d5213b90e5556b50799dadfebdeeaf59eba
@@ -1,3 +1,7 @@
1
+ # 0.17.3
2
+ - Add pipeline class for [FeatureUnion](https://yoshoku.github.io/rumale/doc/Rumale/Pipeline/FeatureUnion.html).
3
+ - Fix to use mmh3 gem for generating hash value on [FeatureHasher](https://yoshoku.github.io/rumale/doc/Rumale/FeatureExtraction/FeatureHasher.html).
4
+
1
5
  # 0.17.2
2
6
  - Add transformer class for kernel approximation with [Nystroem](https://yoshoku.github.io/rumale/doc/Rumale/KernelApproximation/Nystroem.html) method.
3
7
  - Delete array validation on [Pipeline](https://yoshoku.github.io/rumale/doc/Rumale/Pipeline/Pipeline.html) class considering that array of hash is given to HashVectorizer.
@@ -1,4 +1,4 @@
1
- Copyright (c) 2017-2019 Atsushi Tatsuma
1
+ Copyright (c) 2017-2020 Atsushi Tatsuma
2
2
  All rights reserved.
3
3
 
4
4
  Redistribution and use in source and binary forms, with or without
@@ -25,6 +25,7 @@ require 'rumale/optimizer/adam'
25
25
  require 'rumale/optimizer/nadam'
26
26
  require 'rumale/optimizer/yellow_fin'
27
27
  require 'rumale/pipeline/pipeline'
28
+ require 'rumale/pipeline/feature_union'
28
29
  require 'rumale/kernel_approximation/rbf'
29
30
  require 'rumale/kernel_approximation/nystroem'
30
31
  require 'rumale/linear_model/base_linear_model'
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'mmh3'
3
4
  require 'rumale/base/base_estimator'
4
5
  require 'rumale/base/transformer'
5
6
 
@@ -72,7 +73,7 @@ module Rumale
72
73
  val = v.is_a?(String) ? 1 : v
73
74
  next if val.zero?
74
75
 
75
- h = murmur_hash(k.to_s)
76
+ h = Mmh3.hash32(k)
76
77
  fid = h.abs % n_features
77
78
  val *= h >= 0 ? 1 : -1 if alternate_sign?
78
79
  z[i, fid] = val
@@ -91,59 +92,6 @@ module Rumale
91
92
  def alternate_sign?
92
93
  @params[:alternate_sign]
93
94
  end
94
-
95
- # MurmurHash3_32
96
- # References:
97
- # - https://en.wikipedia.org/wiki/MurmurHash
98
- # - https://github.com/aappleby/smhasher
99
- def murmur_hash(key_str, seed = 0)
100
- keyb = key_str.bytes
101
- key_len = keyb.size
102
- n_blocks = key_len / 4
103
-
104
- h = seed
105
- (0...n_blocks * 4).step(4) do |bstart|
106
- k = keyb[bstart + 3] << 24 | keyb[bstart + 2] << 16 | keyb[bstart + 1] << 8 | keyb[bstart + 0]
107
- h ^= murmur_scramble(k)
108
- h = murmur_rotl(h, 13)
109
- h = (h * 5 + 0xe6546b64) & 0xFFFFFFFF
110
- end
111
-
112
- tail_id = n_blocks * 4
113
- tail_sz = key_len & 3
114
-
115
- k = 0
116
- k ^= keyb[tail_id + 2] << 16 if tail_sz >= 3
117
- k ^= keyb[tail_id + 1] << 8 if tail_sz >= 2
118
- k ^= keyb[tail_id + 0] if tail_sz >= 1
119
- h ^= murmur_scramble(k) if tail_sz.positive?
120
-
121
- h = murmur_fmix(h ^ key_len)
122
-
123
- if (h & 0x80000000).zero?
124
- h
125
- else
126
- -((h ^ 0xFFFFFFFF) + 1)
127
- end
128
- end
129
-
130
- def murmur_rotl(x, r)
131
- (x << r | x >> (32 - r)) & 0xFFFFFFFF
132
- end
133
-
134
- def murmur_scramble(k)
135
- k = (k * 0xcc9e2d51) & 0xFFFFFFFF
136
- k = murmur_rotl(k, 15)
137
- (k * 0x1b873593) & 0xFFFFFFFF
138
- end
139
-
140
- def murmur_fmix(h)
141
- h ^= h >> 16
142
- h = (h * 0x85ebca6b) & 0xFFFFFFFF
143
- h ^= h >> 13
144
- h = (h * 0xc2b2ae35) & 0xFFFFFFFF
145
- h ^ (h >> 16)
146
- end
147
95
  end
148
96
  end
149
97
  end
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/validation'
4
+ require 'rumale/base/base_estimator'
5
+
6
+ module Rumale
7
+ module Pipeline
8
+ # FeatureUnion is a class that implements the function concatenating the multi-transformer results.
9
+ #
10
+ # @example
11
+ # fu = Rumale::Pipeline::FeatureUnion.new(
12
+ # transformers: {
13
+ # 'rbf': Rumale::KernelApproximation::RBF.new(gamma: 1.0, n_components: 96, random_seed: 1),
14
+ # 'pca': Rumale::Decomposition::PCA.new(n_components: 32)
15
+ # }
16
+ # )
17
+ # fu.fit(training_samples, traininig_labels)
18
+ # results = fu.predict(testing_samples)
19
+ #
20
+ # # > p results.shape[1]
21
+ # # > 128
22
+ #
23
+ class FeatureUnion
24
+ include Base::BaseEstimator
25
+ include Validation
26
+
27
+ # Return the transformers
28
+ # @return [Hash]
29
+ attr_reader :transformers
30
+
31
+ # Create a new feature union.
32
+ #
33
+ # @param transformers [Hash] List of transformers. The order of transforms follows the insertion order of hash keys.
34
+ def initialize(transformers:)
35
+ check_params_type(Hash, transformers: transformers)
36
+ @params = {}
37
+ @transformers = transformers
38
+ end
39
+
40
+ # Fit the model with given training data.
41
+ #
42
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the transformers.
43
+ # @param y [Numo::NArray/Nil] (shape: [n_samples, n_outputs]) The target values or labels to be used for fitting the transformers.
44
+ # @return [FeatureUnion] The learned feature union itself.
45
+ def fit(x, y = nil)
46
+ @transformers.each { |_k, t| t.fit(x, y) }
47
+ self
48
+ end
49
+
50
+ # Fit the model with training data, and then transform them with the learned model.
51
+ #
52
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the transformers.
53
+ # @param y [Numo::NArray/Nil] (shape: [n_samples, n_outputs]) The target values or labels to be used for fitting the transformers.
54
+ # @return [Numo::DFloat] (shape: [n_samples, sum_n_components]) The transformed and concatenated data.
55
+ def fit_transform(x, y = nil)
56
+ fit(x, y).transform(x)
57
+ end
58
+
59
+ # Transform the given data with the learned model.
60
+ #
61
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned transformers.
62
+ # @return [Numo::DFloat] (shape: [n_samples, sum_n_components]) The transformed and concatenated data.
63
+ def transform(x)
64
+ z = @transformers.values.map { |t| t.transform(x) }
65
+ Numo::NArray.hstack(z)
66
+ end
67
+ end
68
+ end
69
+ end
@@ -3,5 +3,5 @@
3
3
  # Rumale is a machine learning library in Ruby.
4
4
  module Rumale
5
5
  # The version of Rumale you are using.
6
- VERSION = '0.17.2'
6
+ VERSION = '0.17.3'
7
7
  end
@@ -45,6 +45,7 @@ Gem::Specification.new do |spec|
45
45
  spec.required_ruby_version = '>= 2.3'
46
46
 
47
47
  spec.add_runtime_dependency 'numo-narray', '>= 0.9.1'
48
+ spec.add_runtime_dependency 'mmh3', '~> 0.1'
48
49
 
49
50
  spec.add_development_dependency 'bundler', '~> 2.0'
50
51
  spec.add_development_dependency 'coveralls', '~> 0.8'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rumale
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.17.2
4
+ version: 0.17.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-02-01 00:00:00.000000000 Z
11
+ date: 2020-02-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray
@@ -24,6 +24,20 @@ dependencies:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: 0.9.1
27
+ - !ruby/object:Gem::Dependency
28
+ name: mmh3
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '0.1'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '0.1'
27
41
  - !ruby/object:Gem::Dependency
28
42
  name: bundler
29
43
  requirement: !ruby/object:Gem::Requirement
@@ -247,6 +261,7 @@ files:
247
261
  - lib/rumale/optimizer/sgd.rb
248
262
  - lib/rumale/optimizer/yellow_fin.rb
249
263
  - lib/rumale/pairwise_metric.rb
264
+ - lib/rumale/pipeline/feature_union.rb
250
265
  - lib/rumale/pipeline/pipeline.rb
251
266
  - lib/rumale/polynomial_model/base_factorization_machine.rb
252
267
  - lib/rumale/polynomial_model/factorization_machine_classifier.rb