rumale 0.17.2 → 0.17.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '09585216da740231269c5675c48a8fa2ee3a8aba3f5c5b7d671429d113aaa4ba'
4
- data.tar.gz: 66eb473c718f6a03938db19df06694373c781e098d68e2f2c20162865dc4f9f8
3
+ metadata.gz: 68b01301c9e7b9aa9d98f0a252e87fc0c5e5e888379e347660a76a63ae7f86a4
4
+ data.tar.gz: dd464110ff410705a20fa2446c884ed039b9f0ea3febfc4cd12ef8d6b542b736
5
5
  SHA512:
6
- metadata.gz: d68eaa297116c4b834cbf2745355d07e4e4b0e50b170c631c417f623d7a0e75a515389903b6e2cb6f22355f9cf5eded4968eac5c7f216f5a98698bb5283c2e00
7
- data.tar.gz: ddb3be08dc88cc99f5c7086e8bea83496f2644316d382f052a954ce6d1056bc2aabfbd2664ff572a55fdb1017a9bbb0d790d4d83b3788b781d133a97d26bd92e
6
+ metadata.gz: d95cf5140e5846431b812867a64927f51ede6f6a925ffdc3a43f3ae1add1ce33756ad6481ca7645edeed56084fe0f8d294f5032364ff664cae19e9edcdaac57e
7
+ data.tar.gz: 620bd2099ddd5ce14ff9525062938740835fe9714f1c42023f7782e963a96b423bba4921e35d134877aada0d05189d5213b90e5556b50799dadfebdeeaf59eba
@@ -1,3 +1,7 @@
1
+ # 0.17.3
2
+ - Add pipeline class for [FeatureUnion](https://yoshoku.github.io/rumale/doc/Rumale/Pipeline/FeatureUnion.html).
3
+ - Fix to use mmh3 gem for generating hash value on [FeatureHasher](https://yoshoku.github.io/rumale/doc/Rumale/FeatureExtraction/FeatureHasher.html).
4
+
1
5
  # 0.17.2
2
6
  - Add transformer class for kernel approximation with [Nystroem](https://yoshoku.github.io/rumale/doc/Rumale/KernelApproximation/Nystroem.html) method.
3
7
  - Delete array validation on [Pipeline](https://yoshoku.github.io/rumale/doc/Rumale/Pipeline/Pipeline.html) class considering that array of hash is given to HashVectorizer.
@@ -1,4 +1,4 @@
1
- Copyright (c) 2017-2019 Atsushi Tatsuma
1
+ Copyright (c) 2017-2020 Atsushi Tatsuma
2
2
  All rights reserved.
3
3
 
4
4
  Redistribution and use in source and binary forms, with or without
@@ -25,6 +25,7 @@ require 'rumale/optimizer/adam'
25
25
  require 'rumale/optimizer/nadam'
26
26
  require 'rumale/optimizer/yellow_fin'
27
27
  require 'rumale/pipeline/pipeline'
28
+ require 'rumale/pipeline/feature_union'
28
29
  require 'rumale/kernel_approximation/rbf'
29
30
  require 'rumale/kernel_approximation/nystroem'
30
31
  require 'rumale/linear_model/base_linear_model'
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require 'mmh3'
3
4
  require 'rumale/base/base_estimator'
4
5
  require 'rumale/base/transformer'
5
6
 
@@ -72,7 +73,7 @@ module Rumale
72
73
  val = v.is_a?(String) ? 1 : v
73
74
  next if val.zero?
74
75
 
75
- h = murmur_hash(k.to_s)
76
+ h = Mmh3.hash32(k)
76
77
  fid = h.abs % n_features
77
78
  val *= h >= 0 ? 1 : -1 if alternate_sign?
78
79
  z[i, fid] = val
@@ -91,59 +92,6 @@ module Rumale
91
92
  def alternate_sign?
92
93
  @params[:alternate_sign]
93
94
  end
94
-
95
- # MurmurHash3_32
96
- # References:
97
- # - https://en.wikipedia.org/wiki/MurmurHash
98
- # - https://github.com/aappleby/smhasher
99
- def murmur_hash(key_str, seed = 0)
100
- keyb = key_str.bytes
101
- key_len = keyb.size
102
- n_blocks = key_len / 4
103
-
104
- h = seed
105
- (0...n_blocks * 4).step(4) do |bstart|
106
- k = keyb[bstart + 3] << 24 | keyb[bstart + 2] << 16 | keyb[bstart + 1] << 8 | keyb[bstart + 0]
107
- h ^= murmur_scramble(k)
108
- h = murmur_rotl(h, 13)
109
- h = (h * 5 + 0xe6546b64) & 0xFFFFFFFF
110
- end
111
-
112
- tail_id = n_blocks * 4
113
- tail_sz = key_len & 3
114
-
115
- k = 0
116
- k ^= keyb[tail_id + 2] << 16 if tail_sz >= 3
117
- k ^= keyb[tail_id + 1] << 8 if tail_sz >= 2
118
- k ^= keyb[tail_id + 0] if tail_sz >= 1
119
- h ^= murmur_scramble(k) if tail_sz.positive?
120
-
121
- h = murmur_fmix(h ^ key_len)
122
-
123
- if (h & 0x80000000).zero?
124
- h
125
- else
126
- -((h ^ 0xFFFFFFFF) + 1)
127
- end
128
- end
129
-
130
- def murmur_rotl(x, r)
131
- (x << r | x >> (32 - r)) & 0xFFFFFFFF
132
- end
133
-
134
- def murmur_scramble(k)
135
- k = (k * 0xcc9e2d51) & 0xFFFFFFFF
136
- k = murmur_rotl(k, 15)
137
- (k * 0x1b873593) & 0xFFFFFFFF
138
- end
139
-
140
- def murmur_fmix(h)
141
- h ^= h >> 16
142
- h = (h * 0x85ebca6b) & 0xFFFFFFFF
143
- h ^= h >> 13
144
- h = (h * 0xc2b2ae35) & 0xFFFFFFFF
145
- h ^ (h >> 16)
146
- end
147
95
  end
148
96
  end
149
97
  end
@@ -0,0 +1,69 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'rumale/validation'
4
+ require 'rumale/base/base_estimator'
5
+
6
+ module Rumale
7
+ module Pipeline
8
+ # FeatureUnion is a class that implements the function concatenating the multi-transformer results.
9
+ #
10
+ # @example
11
+ # fu = Rumale::Pipeline::FeatureUnion.new(
12
+ # transformers: {
13
+ # 'rbf': Rumale::KernelApproximation::RBF.new(gamma: 1.0, n_components: 96, random_seed: 1),
14
+ # 'pca': Rumale::Decomposition::PCA.new(n_components: 32)
15
+ # }
16
+ # )
17
+ # fu.fit(training_samples, traininig_labels)
18
+ # results = fu.predict(testing_samples)
19
+ #
20
+ # # > p results.shape[1]
21
+ # # > 128
22
+ #
23
+ class FeatureUnion
24
+ include Base::BaseEstimator
25
+ include Validation
26
+
27
+ # Return the transformers
28
+ # @return [Hash]
29
+ attr_reader :transformers
30
+
31
+ # Create a new feature union.
32
+ #
33
+ # @param transformers [Hash] List of transformers. The order of transforms follows the insertion order of hash keys.
34
+ def initialize(transformers:)
35
+ check_params_type(Hash, transformers: transformers)
36
+ @params = {}
37
+ @transformers = transformers
38
+ end
39
+
40
+ # Fit the model with given training data.
41
+ #
42
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the transformers.
43
+ # @param y [Numo::NArray/Nil] (shape: [n_samples, n_outputs]) The target values or labels to be used for fitting the transformers.
44
+ # @return [FeatureUnion] The learned feature union itself.
45
+ def fit(x, y = nil)
46
+ @transformers.each { |_k, t| t.fit(x, y) }
47
+ self
48
+ end
49
+
50
+ # Fit the model with training data, and then transform them with the learned model.
51
+ #
52
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the transformers.
53
+ # @param y [Numo::NArray/Nil] (shape: [n_samples, n_outputs]) The target values or labels to be used for fitting the transformers.
54
+ # @return [Numo::DFloat] (shape: [n_samples, sum_n_components]) The transformed and concatenated data.
55
+ def fit_transform(x, y = nil)
56
+ fit(x, y).transform(x)
57
+ end
58
+
59
+ # Transform the given data with the learned model.
60
+ #
61
+ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned transformers.
62
+ # @return [Numo::DFloat] (shape: [n_samples, sum_n_components]) The transformed and concatenated data.
63
+ def transform(x)
64
+ z = @transformers.values.map { |t| t.transform(x) }
65
+ Numo::NArray.hstack(z)
66
+ end
67
+ end
68
+ end
69
+ end
@@ -3,5 +3,5 @@
3
3
  # Rumale is a machine learning library in Ruby.
4
4
  module Rumale
5
5
  # The version of Rumale you are using.
6
- VERSION = '0.17.2'
6
+ VERSION = '0.17.3'
7
7
  end
@@ -45,6 +45,7 @@ Gem::Specification.new do |spec|
45
45
  spec.required_ruby_version = '>= 2.3'
46
46
 
47
47
  spec.add_runtime_dependency 'numo-narray', '>= 0.9.1'
48
+ spec.add_runtime_dependency 'mmh3', '~> 0.1'
48
49
 
49
50
  spec.add_development_dependency 'bundler', '~> 2.0'
50
51
  spec.add_development_dependency 'coveralls', '~> 0.8'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rumale
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.17.2
4
+ version: 0.17.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - yoshoku
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-02-01 00:00:00.000000000 Z
11
+ date: 2020-02-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: numo-narray
@@ -24,6 +24,20 @@ dependencies:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: 0.9.1
27
+ - !ruby/object:Gem::Dependency
28
+ name: mmh3
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '0.1'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '0.1'
27
41
  - !ruby/object:Gem::Dependency
28
42
  name: bundler
29
43
  requirement: !ruby/object:Gem::Requirement
@@ -247,6 +261,7 @@ files:
247
261
  - lib/rumale/optimizer/sgd.rb
248
262
  - lib/rumale/optimizer/yellow_fin.rb
249
263
  - lib/rumale/pairwise_metric.rb
264
+ - lib/rumale/pipeline/feature_union.rb
250
265
  - lib/rumale/pipeline/pipeline.rb
251
266
  - lib/rumale/polynomial_model/base_factorization_machine.rb
252
267
  - lib/rumale/polynomial_model/factorization_machine_classifier.rb