libsvmloader 0.1.3 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.coveralls.yml +1 -0
- data/.gitignore +9 -0
- data/.rubocop.yml +6 -0
- data/.travis.yml +6 -1
- data/HISTORY.md +6 -0
- data/README.md +30 -1
- data/lib/libsvmloader.rb +71 -51
- data/lib/libsvmloader/version.rb +3 -1
- data/libsvmloader.gemspec +2 -3
- metadata +12 -11
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: e887dc00c4c665e3bf78221650a6740440cf9d07549c01bc47ccfb6940825ddd
|
4
|
+
data.tar.gz: bff658751c419564ccbac3896307a2a1799d46ef7370fd5528e641f35073089b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ab4062fbd21fdddb5b139e6fef498234e04588e858f172042265f2f2ef33f9f06e60f147e52f56a022a12a2e10d527f3d0c6c64d815651a32cd6741ef488dadd
|
7
|
+
data.tar.gz: 8ca0ffb36321df79bb6fd018d2e45d7a6c600371d852ef70ffad644eebcb7273dfb011f34596c3492cd9a1c42b13516ff8b8341df6d2d996e0d660613d3c809a
|
data/.coveralls.yml
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
service_name: travis-ci
|
data/.gitignore
CHANGED
data/.rubocop.yml
CHANGED
data/.travis.yml
CHANGED
data/HISTORY.md
CHANGED
@@ -1,3 +1,9 @@
|
|
1
|
+
# 0.2.0
|
2
|
+
## Breaking changes
|
3
|
+
|
4
|
+
LibSVMLoader has been modified to return the samples and labels of dataset as Ruby Array.
|
5
|
+
Thus, LibSVMLoader does not require NMatrix.
|
6
|
+
|
1
7
|
# 0.1.3
|
2
8
|
- Changed the visibility of protected methods to the private.
|
3
9
|
- Fixed the description in the gemspec file.
|
data/README.md
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# LibSVMLoader
|
2
2
|
|
3
3
|
[](https://travis-ci.org/yoshoku/LibSVMLoader)
|
4
|
+
[](https://coveralls.io/github/yoshoku/LibSVMLoader?branch=master)
|
4
5
|
[](https://badge.fury.io/rb/libsvmloader)
|
5
6
|
[](https://github.com/yoshoku/LibSVMLoader/blob/master/LICENSE.txt)
|
6
7
|
|
@@ -32,10 +33,38 @@ samples, labels = LibSVMLoader.load_libsvm_file('foo.t')
|
|
32
33
|
LibSVMLoader.dump_libsvm_file(samples, labels, 'bar.t')
|
33
34
|
|
34
35
|
# for regression task
|
35
|
-
samples, target_variables = LibSVMLoader.load_libsvm_file('foo.t', label_dtype:
|
36
|
+
samples, target_variables = LibSVMLoader.load_libsvm_file('foo.t', label_dtype: 'float')
|
36
37
|
LibSVMLoader.dump_libsvm_file(samples, target_variables, 'bar.t')
|
37
38
|
```
|
38
39
|
|
40
|
+
When using with Numo::NArray:
|
41
|
+
|
42
|
+
```ruby
|
43
|
+
require 'libsvmloader'
|
44
|
+
require 'numo/narray'
|
45
|
+
|
46
|
+
samples, labels = LibSVMLoader.load_libsvm_file('foo.t')
|
47
|
+
|
48
|
+
samples_na = Numo::NArray[*samples]
|
49
|
+
labels_na = Numo::NArray[*labels]
|
50
|
+
|
51
|
+
LibSVMLoader.dump_libsvm_file(samples_na.to_a, labels_na.to_a, 'bar.t')
|
52
|
+
```
|
53
|
+
|
54
|
+
When using with NMatrix:
|
55
|
+
|
56
|
+
```ruby
|
57
|
+
require 'libsvmloader'
|
58
|
+
require 'nmatrix/nmatrix'
|
59
|
+
|
60
|
+
samples, labels = LibSVMLoader.load_libsvm_file('foo.t')
|
61
|
+
|
62
|
+
samples_nm = N[*samples]
|
63
|
+
labels_nm = N[*labels]
|
64
|
+
|
65
|
+
LibSVMLoader.dump_libsvm_file(samples_nm.to_a, labels_nm.to_a, 'bar.t')
|
66
|
+
```
|
67
|
+
|
39
68
|
## Development
|
40
69
|
|
41
70
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
data/lib/libsvmloader.rb
CHANGED
@@ -1,92 +1,112 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
|
2
3
|
require 'libsvmloader/version'
|
3
|
-
require '
|
4
|
+
require 'csv'
|
4
5
|
|
5
6
|
# LibSVMLoader loads (and dumps) dataset file with the libsvm file format.
|
6
7
|
class LibSVMLoader
|
7
8
|
class << self
|
8
|
-
# Load a dataset with the libsvm file format
|
9
|
+
# Load a dataset with the libsvm file format.
|
9
10
|
#
|
10
|
-
# @param filename [String]
|
11
|
+
# @param filename [String] Path to a dataset file.
|
11
12
|
# @param zero_based [Boolean] Whether the column index starts from 0 (true) or 1 (false).
|
12
|
-
# @param
|
13
|
-
# @param
|
14
|
-
# @param value_dtype [Symbol] The data type of the NMatrix consisting of feature vectors.
|
13
|
+
# @param label_dtype [String] Data type of labels or target values ('int', 'float', 'complex').
|
14
|
+
# @param value_dtype [String] Data type of feature vectors ('int', 'float', 'complex').
|
15
15
|
#
|
16
|
-
# @return [Array<
|
16
|
+
# @return [Array<Array>]
|
17
17
|
# Returns array containing the (n_samples x n_features) matrix for feature vectors
|
18
|
-
# and (n_samples
|
19
|
-
def load_libsvm_file(filename, zero_based: false,
|
20
|
-
ftvecs = []
|
18
|
+
# and (n_samples) vector for labels or target values.
|
19
|
+
def load_libsvm_file(filename, zero_based: false, label_dtype: 'int', value_dtype: 'float')
|
21
20
|
labels = []
|
22
|
-
|
23
|
-
|
24
|
-
|
21
|
+
ftvecs = []
|
22
|
+
maxids = []
|
23
|
+
label_class = parse_dtype(label_dtype)
|
24
|
+
value_class = parse_dtype(value_dtype)
|
25
|
+
CSV.foreach(filename, col_sep: "\s", headers: false) do |row|
|
26
|
+
label, ftvec, maxid = parse_libsvm_row(row, zero_based, label_class, value_class)
|
25
27
|
labels.push(label)
|
26
28
|
ftvecs.push(ftvec)
|
27
|
-
|
29
|
+
maxids.push(maxid)
|
28
30
|
end
|
29
|
-
[
|
30
|
-
NMatrix.new([labels.size, 1], labels, dtype: label_dtype)]
|
31
|
+
[convert_to_matrix(ftvecs, maxids.max + 1, value_class), labels]
|
31
32
|
end
|
32
33
|
|
33
34
|
# Dump the dataset with the libsvm file format.
|
34
35
|
#
|
35
|
-
# @param data [
|
36
|
-
# @param labels [
|
37
|
-
# @param filename [String]
|
36
|
+
# @param data [Array] (n_samples x n_features) matrix consisting of feature vectors.
|
37
|
+
# @param labels [Array] (n_samples) vector consisting of labels or target values.
|
38
|
+
# @param filename [String] Path to the output libsvm file.
|
38
39
|
# @param zero_based [Boolean] Whether the column index starts from 0 (true) or 1 (false).
|
39
40
|
def dump_libsvm_file(data, labels, filename, zero_based: false)
|
40
|
-
n_samples = [data.
|
41
|
-
|
42
|
-
|
41
|
+
n_samples = [data.size, labels.size].min
|
42
|
+
label_format = detect_format(labels.first)
|
43
|
+
value_format = detect_format(data.flatten.first)
|
43
44
|
File.open(filename, 'w') do |file|
|
44
|
-
n_samples.times
|
45
|
-
file.puts(dump_libsvm_line(labels[n], data.row(n),
|
46
|
-
label_type, value_type, zero_based))
|
47
|
-
end
|
45
|
+
n_samples.times { |n| file.puts(dump_libsvm_line(labels[n], data[n], label_format, value_format, zero_based)) }
|
48
46
|
end
|
49
47
|
end
|
50
48
|
|
51
49
|
private
|
52
50
|
|
53
|
-
def
|
54
|
-
|
55
|
-
|
56
|
-
ftvec = tokens.map do |el|
|
51
|
+
def parse_libsvm_row(row, zero_based, label_type, value_type)
|
52
|
+
label = convert_type(row.shift, label_type)
|
53
|
+
ftvec = row.map do |el|
|
57
54
|
idx, val = el.split(':')
|
58
|
-
idx
|
59
|
-
[idx, val.to_f]
|
55
|
+
[idx.to_i - (zero_based == false ? 1 : 0), convert_type(val, value_type)]
|
60
56
|
end
|
61
|
-
max_idx = ftvec.map { |
|
62
|
-
max_idx ||= 0
|
57
|
+
max_idx = ftvec.map { |idx, _val| idx }.max || 0
|
63
58
|
[label, ftvec, max_idx]
|
64
59
|
end
|
65
60
|
|
66
|
-
def
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
61
|
+
def parse_dtype(dtype)
|
62
|
+
case dtype.to_s
|
63
|
+
when /^(int)/i
|
64
|
+
:int
|
65
|
+
when /^(float)/i
|
66
|
+
:float
|
67
|
+
when /^(complex)/i
|
68
|
+
:complex
|
69
|
+
else
|
70
|
+
:string
|
74
71
|
end
|
75
|
-
mat
|
76
72
|
end
|
77
73
|
|
78
|
-
def
|
74
|
+
def convert_type(value, dtype)
|
75
|
+
case dtype
|
76
|
+
when :int
|
77
|
+
value.to_i
|
78
|
+
when :float
|
79
|
+
value.to_f
|
80
|
+
when :complex
|
81
|
+
value.to_c
|
82
|
+
else
|
83
|
+
value
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
def convert_to_matrix(data, n_features, value_type)
|
88
|
+
z = convert_type(0, value_type)
|
89
|
+
data.map do |ft|
|
90
|
+
vec = Array.new(n_features) { z }
|
91
|
+
ft.each { |idx, val| vec[idx] = val }
|
92
|
+
vec
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def detect_format(data)
|
79
97
|
type = '%s'
|
80
|
-
type = '%d' if
|
81
|
-
type = '%.10g' if
|
98
|
+
type = '%d' if data.is_a?(Integer)
|
99
|
+
type = '%.10g' if data.is_a?(Float)
|
82
100
|
type
|
83
101
|
end
|
84
102
|
|
85
|
-
def dump_libsvm_line(label, ftvec,
|
86
|
-
line = format(
|
87
|
-
ftvec.
|
88
|
-
|
89
|
-
|
103
|
+
def dump_libsvm_line(label, ftvec, label_format, value_format, zero_based)
|
104
|
+
line = format(label_format, label)
|
105
|
+
ftvec.each_with_index do |val, n|
|
106
|
+
unless val.zero?
|
107
|
+
idx = n + (zero_based == false ? 1 : 0)
|
108
|
+
line += format(" %d:#{value_format}", idx, val)
|
109
|
+
end
|
90
110
|
end
|
91
111
|
line
|
92
112
|
end
|
data/lib/libsvmloader/version.rb
CHANGED
data/libsvmloader.gemspec
CHANGED
@@ -14,7 +14,7 @@ LibSVMLoader loads (and dumps) dataset file with the libsvm file format.
|
|
14
14
|
MSG
|
15
15
|
spec.description = <<MSG
|
16
16
|
LibSVMLoader is a class that loads (and dumps) dataset file with the libsvm file format.
|
17
|
-
The
|
17
|
+
The feature vectors and labels of dataset are represented by Ruby Array.
|
18
18
|
MSG
|
19
19
|
spec.homepage = 'https://github.com/yoshoku/libsvmloader'
|
20
20
|
spec.license = 'MIT'
|
@@ -28,9 +28,8 @@ MSG
|
|
28
28
|
|
29
29
|
spec.required_ruby_version = '>= 2.1'
|
30
30
|
|
31
|
-
spec.add_runtime_dependency 'nmatrix', '~> 0.2.3'
|
32
|
-
|
33
31
|
spec.add_development_dependency 'bundler', '~> 1.15'
|
32
|
+
spec.add_development_dependency 'coveralls', '~> 0.8'
|
34
33
|
spec.add_development_dependency 'rake', '~> 10.0'
|
35
34
|
spec.add_development_dependency 'rspec', '~> 3.0'
|
36
35
|
end
|
metadata
CHANGED
@@ -1,43 +1,43 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: libsvmloader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-08-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
14
|
+
name: bundler
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version:
|
20
|
-
type: :
|
19
|
+
version: '1.15'
|
20
|
+
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version:
|
26
|
+
version: '1.15'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
28
|
+
name: coveralls
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
33
|
+
version: '0.8'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
40
|
+
version: '0.8'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: rake
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -68,7 +68,7 @@ dependencies:
|
|
68
68
|
version: '3.0'
|
69
69
|
description: |
|
70
70
|
LibSVMLoader is a class that loads (and dumps) dataset file with the libsvm file format.
|
71
|
-
The
|
71
|
+
The feature vectors and labels of dataset are represented by Ruby Array.
|
72
72
|
email:
|
73
73
|
- yoshoku@outlook.com
|
74
74
|
executables:
|
@@ -76,6 +76,7 @@ executables:
|
|
76
76
|
extensions: []
|
77
77
|
extra_rdoc_files: []
|
78
78
|
files:
|
79
|
+
- ".coveralls.yml"
|
79
80
|
- ".gitignore"
|
80
81
|
- ".rspec"
|
81
82
|
- ".rubocop.yml"
|
@@ -112,7 +113,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
112
113
|
version: '0'
|
113
114
|
requirements: []
|
114
115
|
rubyforge_project:
|
115
|
-
rubygems_version: 2.
|
116
|
+
rubygems_version: 2.7.6
|
116
117
|
signing_key:
|
117
118
|
specification_version: 4
|
118
119
|
summary: LibSVMLoader loads (and dumps) dataset file with the libsvm file format.
|