rumale-feature_extraction 0.24.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE.txt +27 -0
- data/README.md +34 -0
- data/lib/rumale/feature_extraction/feature_hasher.rb +100 -0
- data/lib/rumale/feature_extraction/hash_vectorizer.rb +157 -0
- data/lib/rumale/feature_extraction/tfidf_transformer.rb +111 -0
- data/lib/rumale/feature_extraction/version.rb +10 -0
- data/lib/rumale/feature_extraction.rb +8 -0
- metadata +101 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 7927d78c3c8294fdaba1f509c5bfa0d3d5960d5813cba42aaa5c2765317064dd
|
4
|
+
data.tar.gz: 43422862894245c61da3b8973a3991cccf80d87f901fbab635077a00fe7670d8
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 9127e6789c784861dc6302cbd69b6abc6afc841e8ba22ef0e4b1b42cd0a575433fe79e37c3797eee632560cf7d0a7585aee1e2a28ee7d1df8ae770c5be2f587f
|
7
|
+
data.tar.gz: a0455a7c16fc510d2428d9476e22d883bb1377779552daba8243ce20bdd332df69be4f3143aa1d4abe2bc4b319210c06872ff6239a25565fa13da82298113b13
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
Copyright (c) 2022 Atsushi Tatsuma
|
2
|
+
All rights reserved.
|
3
|
+
|
4
|
+
Redistribution and use in source and binary forms, with or without
|
5
|
+
modification, are permitted provided that the following conditions are met:
|
6
|
+
|
7
|
+
* Redistributions of source code must retain the above copyright notice, this
|
8
|
+
list of conditions and the following disclaimer.
|
9
|
+
|
10
|
+
* Redistributions in binary form must reproduce the above copyright notice,
|
11
|
+
this list of conditions and the following disclaimer in the documentation
|
12
|
+
and/or other materials provided with the distribution.
|
13
|
+
|
14
|
+
* Neither the name of the copyright holder nor the names of its
|
15
|
+
contributors may be used to endorse or promote products derived from
|
16
|
+
this software without specific prior written permission.
|
17
|
+
|
18
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
19
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
20
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
21
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
22
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
23
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
24
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
25
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
26
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
27
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
data/README.md
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
# Rumale::FeatureExtraction
|
2
|
+
|
3
|
+
[](https://badge.fury.io/rb/rumale-feature_extraction)
|
4
|
+
[](https://github.com/yoshoku/rumale/blob/main/rumale-feature_extraction/LICENSE.txt)
|
5
|
+
[](https://yoshoku.github.io/rumale/doc/Rumale/FeatureExtraction.html)
|
6
|
+
|
7
|
+
Rumale is a machine learning library in Ruby.
|
8
|
+
Rumale::FeatureExtraction provides feature extraction methods,
|
9
|
+
such as TF-IDF and feature hashing,
|
10
|
+
with Rumale interface.
|
11
|
+
|
12
|
+
## Installation
|
13
|
+
|
14
|
+
Add this line to your application's Gemfile:
|
15
|
+
|
16
|
+
```ruby
|
17
|
+
gem 'rumale-feature_extraction'
|
18
|
+
```
|
19
|
+
|
20
|
+
And then execute:
|
21
|
+
|
22
|
+
$ bundle install
|
23
|
+
|
24
|
+
Or install it yourself as:
|
25
|
+
|
26
|
+
$ gem install rumale-feature_extraction
|
27
|
+
|
28
|
+
## Documentation
|
29
|
+
|
30
|
+
- [Rumale API Documentation - FeatureExtraction](https://yoshoku.github.io/rumale/doc/Rumale/FeatureExtraction.html)
|
31
|
+
|
32
|
+
## License
|
33
|
+
|
34
|
+
The gem is available as open source under the terms of the [BSD-3-Clause License](https://opensource.org/licenses/BSD-3-Clause).
|
@@ -0,0 +1,100 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'mmh3'
|
4
|
+
|
5
|
+
require 'rumale/base/estimator'
|
6
|
+
require 'rumale/base/transformer'
|
7
|
+
|
8
|
+
module Rumale
|
9
|
+
module FeatureExtraction
|
10
|
+
# Encode array of feature-value hash to vectors with feature hashing (hashing trick).
|
11
|
+
# This encoder turns array of mappings (Array<Hash>) with pairs of feature names and values into Numo::NArray.
|
12
|
+
# This encoder employs signed 32-bit Murmurhash3 as the hash function.
|
13
|
+
#
|
14
|
+
# @example
|
15
|
+
# require 'rumale/feature_extraction/feature_hasher'
|
16
|
+
#
|
17
|
+
# encoder = Rumale::FeatureExtraction::FeatureHasher.new(n_features: 10)
|
18
|
+
# x = encoder.transform([
|
19
|
+
# { dog: 1, cat: 2, elephant: 4 },
|
20
|
+
# { dog: 2, run: 5 }
|
21
|
+
# ])
|
22
|
+
#
|
23
|
+
# # > pp x
|
24
|
+
# # Numo::DFloat#shape=[2,10]
|
25
|
+
# # [[0, 0, -4, -1, 0, 0, 0, 0, 0, 2],
|
26
|
+
# # [0, 0, 0, -2, -5, 0, 0, 0, 0, 0]]
|
27
|
+
class FeatureHasher < ::Rumale::Base::Estimator
|
28
|
+
include ::Rumale::Base::Transformer
|
29
|
+
|
30
|
+
# Create a new encoder for converting array of hash consisting of feature names and values to vectors
|
31
|
+
# with feature hashing algorith.
|
32
|
+
#
|
33
|
+
# @param n_features [Integer] The number of features of encoded samples.
|
34
|
+
# @param alternate_sign [Boolean] The flag indicating whether to reflect the sign of the hash value to the feature value.
|
35
|
+
def initialize(n_features: 1024, alternate_sign: true)
|
36
|
+
super()
|
37
|
+
@params = {
|
38
|
+
n_features: n_features,
|
39
|
+
alternate_sign: alternate_sign
|
40
|
+
}
|
41
|
+
end
|
42
|
+
|
43
|
+
# This method does not do anything. The encoder does not require training.
|
44
|
+
#
|
45
|
+
# @overload fit(x) -> FeatureHasher
|
46
|
+
# @param x [Array<Hash>] (shape: [n_samples]) The array of hash consisting of feature names and values.
|
47
|
+
# @return [FeatureHasher]
|
48
|
+
def fit(_x = nil, _y = nil)
|
49
|
+
self
|
50
|
+
end
|
51
|
+
|
52
|
+
# Encode given the array of feature-value hash.
|
53
|
+
# This method has the same output as the transform method
|
54
|
+
# because the encoder does not require training.
|
55
|
+
#
|
56
|
+
# @overload fit_transform(x) -> Numo::DFloat
|
57
|
+
# @param x [Array<Hash>] (shape: [n_samples]) The array of hash consisting of feature names and values.
|
58
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_features]) The encoded sample array.
|
59
|
+
def fit_transform(x, _y = nil)
|
60
|
+
fit(x).transform(x)
|
61
|
+
end
|
62
|
+
|
63
|
+
# Encode given the array of feature-value hash.
|
64
|
+
#
|
65
|
+
# @param x [Array<Hash>] (shape: [n_samples]) The array of hash consisting of feature names and values.
|
66
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_features]) The encoded sample array.
|
67
|
+
def transform(x)
|
68
|
+
x = [x] unless x.is_a?(Array)
|
69
|
+
n_samples = x.size
|
70
|
+
|
71
|
+
z = Numo::DFloat.zeros(n_samples, n_features)
|
72
|
+
|
73
|
+
x.each_with_index do |f, i|
|
74
|
+
f.each do |k, v|
|
75
|
+
k = "#{k}=#{v}" if v.is_a?(String)
|
76
|
+
val = v.is_a?(String) ? 1 : v
|
77
|
+
next if val.zero?
|
78
|
+
|
79
|
+
h = Mmh3.hash32(k)
|
80
|
+
fid = h.abs % n_features
|
81
|
+
val *= h >= 0 ? 1 : -1 if alternate_sign?
|
82
|
+
z[i, fid] = val
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
z
|
87
|
+
end
|
88
|
+
|
89
|
+
private
|
90
|
+
|
91
|
+
def n_features
|
92
|
+
@params[:n_features]
|
93
|
+
end
|
94
|
+
|
95
|
+
def alternate_sign?
|
96
|
+
@params[:alternate_sign]
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
@@ -0,0 +1,157 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/estimator'
|
4
|
+
require 'rumale/base/transformer'
|
5
|
+
|
6
|
+
module Rumale
|
7
|
+
module FeatureExtraction
|
8
|
+
# Encode array of feature-value hash to vectors.
|
9
|
+
# This encoder turns array of mappings (Array<Hash>) with pairs of feature names and values into Numo::NArray.
|
10
|
+
#
|
11
|
+
# @example
|
12
|
+
# require 'rumale/feature_extraction/hash_vectorizer'
|
13
|
+
#
|
14
|
+
# encoder = Rumale::FeatureExtraction::HashVectorizer.new
|
15
|
+
# x = encoder.fit_transform([
|
16
|
+
# { foo: 1, bar: 2 },
|
17
|
+
# { foo: 3, baz: 1 }
|
18
|
+
# ])
|
19
|
+
#
|
20
|
+
# # > pp x
|
21
|
+
# # Numo::DFloat#shape=[2,3]
|
22
|
+
# # [[2, 0, 1],
|
23
|
+
# # [0, 1, 3]]
|
24
|
+
#
|
25
|
+
# x = encoder.fit_transform([
|
26
|
+
# { city: 'Dubai', temperature: 33 },
|
27
|
+
# { city: 'London', temperature: 12 },
|
28
|
+
# { city: 'San Francisco', temperature: 18 }
|
29
|
+
# ])
|
30
|
+
#
|
31
|
+
# # > pp x
|
32
|
+
# # Numo::DFloat#shape=[3,4]
|
33
|
+
# # [[1, 0, 0, 33],
|
34
|
+
# # [0, 1, 0, 12],
|
35
|
+
# # [0, 0, 1, 18]]
|
36
|
+
# # > pp encoder.inverse_transform(x)
|
37
|
+
# # [{:city=>"Dubai", :temperature=>33.0},
|
38
|
+
# # {:city=>"London", :temperature=>12.0},
|
39
|
+
# # {:city=>"San Francisco", :temperature=>18.0}]
|
40
|
+
class HashVectorizer < ::Rumale::Base::Estimator
|
41
|
+
include ::Rumale::Base::Transformer
|
42
|
+
|
43
|
+
# Return the list of feature names.
|
44
|
+
# @return [Array] (size: [n_features])
|
45
|
+
attr_reader :feature_names
|
46
|
+
|
47
|
+
# Return the hash consisting of pairs of feature names and indices.
|
48
|
+
# @return [Hash] (size: [n_features])
|
49
|
+
attr_reader :vocabulary
|
50
|
+
|
51
|
+
# Create a new encoder for converting array of hash consisting of feature names and values to vectors.
|
52
|
+
#
|
53
|
+
# @param separator [String] The separator string used for constructing new feature names for categorical feature.
|
54
|
+
# @param sort [Boolean] The flag indicating whether to sort feature names.
|
55
|
+
def initialize(separator: '=', sort: true)
|
56
|
+
super()
|
57
|
+
@params = {
|
58
|
+
separator: separator,
|
59
|
+
sort: sort
|
60
|
+
}
|
61
|
+
end
|
62
|
+
|
63
|
+
# Fit the encoder with given training data.
|
64
|
+
#
|
65
|
+
# @overload fit(x) -> HashVectorizer
|
66
|
+
# @param x [Array<Hash>] (shape: [n_samples]) The array of hash consisting of feature names and values.
|
67
|
+
# @return [HashVectorizer]
|
68
|
+
def fit(x, _y = nil)
|
69
|
+
@feature_names = []
|
70
|
+
@vocabulary = {}
|
71
|
+
|
72
|
+
x.each do |f|
|
73
|
+
f.each do |k, v|
|
74
|
+
k = "#{k}#{separator}#{v}".to_sym if v.is_a?(String)
|
75
|
+
next if @vocabulary.key?(k)
|
76
|
+
|
77
|
+
@feature_names.push(k)
|
78
|
+
@vocabulary[k] = @vocabulary.size
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
if sort_feature?
|
83
|
+
@feature_names.sort!
|
84
|
+
@feature_names.each_with_index { |k, i| @vocabulary[k] = i }
|
85
|
+
end
|
86
|
+
|
87
|
+
self
|
88
|
+
end
|
89
|
+
|
90
|
+
# Fit the encoder with given training data, then return encoded data.
|
91
|
+
#
|
92
|
+
# @overload fit_transform(x) -> Numo::DFloat
|
93
|
+
# @param x [Array<Hash>] (shape: [n_samples]) The array of hash consisting of feature names and values.
|
94
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_features]) The encoded sample array.
|
95
|
+
def fit_transform(x, _y = nil)
|
96
|
+
fit(x).transform(x)
|
97
|
+
end
|
98
|
+
|
99
|
+
# Encode given the array of feature-value hash.
|
100
|
+
#
|
101
|
+
# @param x [Array<Hash>] (shape: [n_samples]) The array of hash consisting of feature names and values.
|
102
|
+
# @return [Numo::DFloat] (shape: [n_samples, n_features]) The encoded sample array.
|
103
|
+
def transform(x)
|
104
|
+
x = [x] unless x.is_a?(Array)
|
105
|
+
n_samples = x.size
|
106
|
+
n_features = @vocabulary.size
|
107
|
+
z = Numo::DFloat.zeros(n_samples, n_features)
|
108
|
+
|
109
|
+
x.each_with_index do |f, i|
|
110
|
+
f.each do |k, v|
|
111
|
+
if v.is_a?(String)
|
112
|
+
k = "#{k}#{separator}#{v}".to_sym
|
113
|
+
v = 1
|
114
|
+
end
|
115
|
+
z[i, @vocabulary[k]] = v if @vocabulary.key?(k)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
z
|
120
|
+
end
|
121
|
+
|
122
|
+
# Decode sample matirx to the array of feature-value hash.
|
123
|
+
#
|
124
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The encoded sample array.
|
125
|
+
# @return [Array<Hash>] The array of hash consisting of feature names and values.
|
126
|
+
def inverse_transform(x)
|
127
|
+
n_samples = x.shape[0]
|
128
|
+
reconst = []
|
129
|
+
|
130
|
+
n_samples.times do |i|
|
131
|
+
f = {}
|
132
|
+
x[i, true].each_with_index do |el, j|
|
133
|
+
feature_key_val(@feature_names[j], el).tap { |k, v| f[k.to_sym] = v } unless el.zero?
|
134
|
+
end
|
135
|
+
reconst.push(f)
|
136
|
+
end
|
137
|
+
|
138
|
+
reconst
|
139
|
+
end
|
140
|
+
|
141
|
+
private
|
142
|
+
|
143
|
+
def feature_key_val(fname, fval)
|
144
|
+
f = fname.to_s.split(separator)
|
145
|
+
f.size == 2 ? f : [fname, fval]
|
146
|
+
end
|
147
|
+
|
148
|
+
def separator
|
149
|
+
@params[:separator]
|
150
|
+
end
|
151
|
+
|
152
|
+
def sort_feature?
|
153
|
+
@params[:sort]
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
157
|
+
end
|
@@ -0,0 +1,111 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'rumale/base/estimator'
|
4
|
+
require 'rumale/base/transformer'
|
5
|
+
require 'rumale/utils'
|
6
|
+
|
7
|
+
module Rumale
|
8
|
+
module FeatureExtraction
|
9
|
+
# Transform sample matrix with term frequecy (tf) to a normalized tf-idf (inverse document frequency) reprensentation.
|
10
|
+
#
|
11
|
+
# @example
|
12
|
+
# require 'rumale/feature_extraction/hash_vectorizer'
|
13
|
+
# require 'rumale/feature_extraction/tfidf_transformer'
|
14
|
+
#
|
15
|
+
# encoder = Rumale::FeatureExtraction::HashVectorizer.new
|
16
|
+
# x = encoder.fit_transform([
|
17
|
+
# { foo: 1, bar: 2 },
|
18
|
+
# { foo: 3, baz: 1 }
|
19
|
+
# ])
|
20
|
+
#
|
21
|
+
# # > pp x
|
22
|
+
# # Numo::DFloat#shape=[2,3]
|
23
|
+
# # [[2, 0, 1],
|
24
|
+
# # [0, 1, 3]]
|
25
|
+
#
|
26
|
+
# transformer = Rumale::FeatureExtraction::TfidfTransformer.new
|
27
|
+
# x_tfidf = transformer.fit_transform(x)
|
28
|
+
#
|
29
|
+
# # > pp x_tfidf
|
30
|
+
# # Numo::DFloat#shape=[2,3]
|
31
|
+
# # [[0.959056, 0, 0.283217],
|
32
|
+
# # [0, 0.491506, 0.870874]]
|
33
|
+
#
|
34
|
+
# *Reference*
|
35
|
+
# - Manning, C D., Raghavan, P., and Schutze, H., "Introduction to Information Retrieval," Cambridge University Press., 2008.
|
36
|
+
class TfidfTransformer < ::Rumale::Base::Estimator
|
37
|
+
include ::Rumale::Base::Transformer
|
38
|
+
|
39
|
+
# Return the vector consists of inverse document frequency.
|
40
|
+
# @return [Numo::DFloat] (shape: [n_features])
|
41
|
+
attr_reader :idf
|
42
|
+
|
43
|
+
# Create a new transfomer for converting tf vectors to tf-idf vectors.
|
44
|
+
#
|
45
|
+
# @param norm [String] The normalization method to be used ('l1', 'l2' and 'none').
|
46
|
+
# @param use_idf [Boolean] The flag indicating whether to use inverse document frequency weighting.
|
47
|
+
# @param smooth_idf [Boolean] The flag indicating whether to apply idf smoothing by log((n_samples + 1) / (df + 1)) + 1.
|
48
|
+
# @param sublinear_tf [Boolean] The flag indicating whether to perform subliner tf scaling by 1 + log(tf).
|
49
|
+
def initialize(norm: 'l2', use_idf: true, smooth_idf: false, sublinear_tf: false)
|
50
|
+
super()
|
51
|
+
@params = {
|
52
|
+
norm: norm,
|
53
|
+
use_idf: use_idf,
|
54
|
+
smooth_idf: smooth_idf,
|
55
|
+
sublinear_tf: sublinear_tf
|
56
|
+
}
|
57
|
+
end
|
58
|
+
|
59
|
+
# Calculate the inverse document frequency for weighting.
|
60
|
+
#
|
61
|
+
# @overload fit(x) -> TfidfTransformer
|
62
|
+
#
|
63
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate the idf values.
|
64
|
+
# @return [TfidfTransformer]
|
65
|
+
def fit(x, _y = nil)
|
66
|
+
return self unless @params[:use_idf]
|
67
|
+
|
68
|
+
n_samples = x.shape[0]
|
69
|
+
df = x.class.cast(x.gt(0.0).count(0))
|
70
|
+
|
71
|
+
if @params[:smooth_idf]
|
72
|
+
df += 1
|
73
|
+
n_samples += 1
|
74
|
+
end
|
75
|
+
|
76
|
+
@idf = Numo::NMath.log(n_samples / df) + 1
|
77
|
+
|
78
|
+
self
|
79
|
+
end
|
80
|
+
|
81
|
+
# Calculate the idf values, and then transfrom samples to the tf-idf representation.
|
82
|
+
#
|
83
|
+
# @overload fit_transform(x) -> Numo::DFloat
|
84
|
+
#
|
85
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to calculate idf and be transformed to tf-idf representation.
|
86
|
+
# @return [Numo::DFloat] The transformed samples.
|
87
|
+
def fit_transform(x, _y = nil)
|
88
|
+
fit(x).transform(x)
|
89
|
+
end
|
90
|
+
|
91
|
+
# Perform transforming the given samples to the tf-idf representation.
|
92
|
+
#
|
93
|
+
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to be transformed.
|
94
|
+
# @return [Numo::DFloat] The transformed samples.
|
95
|
+
def transform(x)
|
96
|
+
z = x.dup
|
97
|
+
|
98
|
+
z[z.ne(0)] = Numo::NMath.log(z[z.ne(0)]) + 1 if @params[:sublinear_tf]
|
99
|
+
z *= @idf if @params[:use_idf]
|
100
|
+
case @params[:norm]
|
101
|
+
when 'l2'
|
102
|
+
::Rumale::Utils.normalize(z, 'l2')
|
103
|
+
when 'l1'
|
104
|
+
::Rumale::Utils.normalize(z, 'l1')
|
105
|
+
else
|
106
|
+
z
|
107
|
+
end
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
@@ -0,0 +1,8 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'numo/narray'
|
4
|
+
|
5
|
+
require_relative 'feature_extraction/feature_hasher'
|
6
|
+
require_relative 'feature_extraction/hash_vectorizer'
|
7
|
+
require_relative 'feature_extraction/tfidf_transformer'
|
8
|
+
require_relative 'feature_extraction/version'
|
metadata
ADDED
@@ -0,0 +1,101 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: rumale-feature_extraction
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.24.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- yoshoku
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2022-12-31 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: mmh3
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: numo-narray
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 0.9.1
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 0.9.1
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rumale-core
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 0.24.0
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 0.24.0
|
55
|
+
description: |
|
56
|
+
Rumale::FeatureExtraction provides feature extraction methods,
|
57
|
+
such as TF-IDF and feature hashing,
|
58
|
+
with Rumale interface.
|
59
|
+
email:
|
60
|
+
- yoshoku@outlook.com
|
61
|
+
executables: []
|
62
|
+
extensions: []
|
63
|
+
extra_rdoc_files: []
|
64
|
+
files:
|
65
|
+
- LICENSE.txt
|
66
|
+
- README.md
|
67
|
+
- lib/rumale/feature_extraction.rb
|
68
|
+
- lib/rumale/feature_extraction/feature_hasher.rb
|
69
|
+
- lib/rumale/feature_extraction/hash_vectorizer.rb
|
70
|
+
- lib/rumale/feature_extraction/tfidf_transformer.rb
|
71
|
+
- lib/rumale/feature_extraction/version.rb
|
72
|
+
homepage: https://github.com/yoshoku/rumale
|
73
|
+
licenses:
|
74
|
+
- BSD-3-Clause
|
75
|
+
metadata:
|
76
|
+
homepage_uri: https://github.com/yoshoku/rumale
|
77
|
+
source_code_uri: https://github.com/yoshoku/rumale/tree/main/rumale-feature_extraction
|
78
|
+
changelog_uri: https://github.com/yoshoku/rumale/blob/main/CHANGELOG.md
|
79
|
+
documentation_uri: https://yoshoku.github.io/rumale/doc/
|
80
|
+
rubygems_mfa_required: 'true'
|
81
|
+
post_install_message:
|
82
|
+
rdoc_options: []
|
83
|
+
require_paths:
|
84
|
+
- lib
|
85
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0'
|
90
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
91
|
+
requirements:
|
92
|
+
- - ">="
|
93
|
+
- !ruby/object:Gem::Version
|
94
|
+
version: '0'
|
95
|
+
requirements: []
|
96
|
+
rubygems_version: 3.3.26
|
97
|
+
signing_key:
|
98
|
+
specification_version: 4
|
99
|
+
summary: Rumale::FeatureExtraction provides feature extraction methods with Rumale
|
100
|
+
interface.
|
101
|
+
test_files: []
|