libsvmloader 0.1.3 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.coveralls.yml +1 -0
- data/.gitignore +9 -0
- data/.rubocop.yml +6 -0
- data/.travis.yml +6 -1
- data/HISTORY.md +6 -0
- data/README.md +30 -1
- data/lib/libsvmloader.rb +71 -51
- data/lib/libsvmloader/version.rb +3 -1
- data/libsvmloader.gemspec +2 -3
- metadata +12 -11
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: e887dc00c4c665e3bf78221650a6740440cf9d07549c01bc47ccfb6940825ddd
|
4
|
+
data.tar.gz: bff658751c419564ccbac3896307a2a1799d46ef7370fd5528e641f35073089b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ab4062fbd21fdddb5b139e6fef498234e04588e858f172042265f2f2ef33f9f06e60f147e52f56a022a12a2e10d527f3d0c6c64d815651a32cd6741ef488dadd
|
7
|
+
data.tar.gz: 8ca0ffb36321df79bb6fd018d2e45d7a6c600371d852ef70ffad644eebcb7273dfb011f34596c3492cd9a1c42b13516ff8b8341df6d2d996e0d660613d3c809a
|
data/.coveralls.yml
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
service_name: travis-ci
|
data/.gitignore
CHANGED
data/.rubocop.yml
CHANGED
data/.travis.yml
CHANGED
data/HISTORY.md
CHANGED
@@ -1,3 +1,9 @@
|
|
1
|
+
# 0.2.0
|
2
|
+
## Breaking changes
|
3
|
+
|
4
|
+
LibSVMLoader has been modified to return the samples and labels of dataset as Ruby Array.
|
5
|
+
Thus, LibSVMLoader does not require NMatrix.
|
6
|
+
|
1
7
|
# 0.1.3
|
2
8
|
- Changed the visibility of protected methods to the private.
|
3
9
|
- Fixed the description in the gemspec file.
|
data/README.md
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# LibSVMLoader
|
2
2
|
|
3
3
|
[![Build Status](https://travis-ci.org/yoshoku/LibSVMLoader.svg?branch=master)](https://travis-ci.org/yoshoku/LibSVMLoader)
|
4
|
+
[![Coverage Status](https://coveralls.io/repos/github/yoshoku/LibSVMLoader/badge.svg?branch=master)](https://coveralls.io/github/yoshoku/LibSVMLoader?branch=master)
|
4
5
|
[![Gem Version](https://badge.fury.io/rb/libsvmloader.svg)](https://badge.fury.io/rb/libsvmloader)
|
5
6
|
[![MIT License](https://img.shields.io/badge/License-MIT-blue.svg)](https://github.com/yoshoku/LibSVMLoader/blob/master/LICENSE.txt)
|
6
7
|
|
@@ -32,10 +33,38 @@ samples, labels = LibSVMLoader.load_libsvm_file('foo.t')
|
|
32
33
|
LibSVMLoader.dump_libsvm_file(samples, labels, 'bar.t')
|
33
34
|
|
34
35
|
# for regression task
|
35
|
-
samples, target_variables = LibSVMLoader.load_libsvm_file('foo.t', label_dtype:
|
36
|
+
samples, target_variables = LibSVMLoader.load_libsvm_file('foo.t', label_dtype: 'float')
|
36
37
|
LibSVMLoader.dump_libsvm_file(samples, target_variables, 'bar.t')
|
37
38
|
```
|
38
39
|
|
40
|
+
When using with Numo::NArray:
|
41
|
+
|
42
|
+
```ruby
|
43
|
+
require 'libsvmloader'
|
44
|
+
require 'numo/narray'
|
45
|
+
|
46
|
+
samples, labels = LibSVMLoader.load_libsvm_file('foo.t')
|
47
|
+
|
48
|
+
samples_na = Numo::NArray[*samples]
|
49
|
+
labels_na = Numo::NArray[*labels]
|
50
|
+
|
51
|
+
LibSVMLoader.dump_libsvm_file(samples_na.to_a, labels_na.to_a, 'bar.t')
|
52
|
+
```
|
53
|
+
|
54
|
+
When using with NMatrix:
|
55
|
+
|
56
|
+
```ruby
|
57
|
+
require 'libsvmloader'
|
58
|
+
require 'nmatrix/nmatrix'
|
59
|
+
|
60
|
+
samples, labels = LibSVMLoader.load_libsvm_file('foo.t')
|
61
|
+
|
62
|
+
samples_nm = N[*samples]
|
63
|
+
labels_nm = N[*labels]
|
64
|
+
|
65
|
+
LibSVMLoader.dump_libsvm_file(samples_nm.to_a, labels_nm.to_a, 'bar.t')
|
66
|
+
```
|
67
|
+
|
39
68
|
## Development
|
40
69
|
|
41
70
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
data/lib/libsvmloader.rb
CHANGED
@@ -1,92 +1,112 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
|
2
3
|
require 'libsvmloader/version'
|
3
|
-
require '
|
4
|
+
require 'csv'
|
4
5
|
|
5
6
|
# LibSVMLoader loads (and dumps) dataset file with the libsvm file format.
|
6
7
|
class LibSVMLoader
|
7
8
|
class << self
|
8
|
-
# Load a dataset with the libsvm file format
|
9
|
+
# Load a dataset with the libsvm file format.
|
9
10
|
#
|
10
|
-
# @param filename [String]
|
11
|
+
# @param filename [String] Path to a dataset file.
|
11
12
|
# @param zero_based [Boolean] Whether the column index starts from 0 (true) or 1 (false).
|
12
|
-
# @param
|
13
|
-
# @param
|
14
|
-
# @param value_dtype [Symbol] The data type of the NMatrix consisting of feature vectors.
|
13
|
+
# @param label_dtype [String] Data type of labels or target values ('int', 'float', 'complex').
|
14
|
+
# @param value_dtype [String] Data type of feature vectors ('int', 'float', 'complex').
|
15
15
|
#
|
16
|
-
# @return [Array<
|
16
|
+
# @return [Array<Array>]
|
17
17
|
# Returns array containing the (n_samples x n_features) matrix for feature vectors
|
18
|
-
# and (n_samples
|
19
|
-
def load_libsvm_file(filename, zero_based: false,
|
20
|
-
ftvecs = []
|
18
|
+
# and (n_samples) vector for labels or target values.
|
19
|
+
def load_libsvm_file(filename, zero_based: false, label_dtype: 'int', value_dtype: 'float')
|
21
20
|
labels = []
|
22
|
-
|
23
|
-
|
24
|
-
|
21
|
+
ftvecs = []
|
22
|
+
maxids = []
|
23
|
+
label_class = parse_dtype(label_dtype)
|
24
|
+
value_class = parse_dtype(value_dtype)
|
25
|
+
CSV.foreach(filename, col_sep: "\s", headers: false) do |row|
|
26
|
+
label, ftvec, maxid = parse_libsvm_row(row, zero_based, label_class, value_class)
|
25
27
|
labels.push(label)
|
26
28
|
ftvecs.push(ftvec)
|
27
|
-
|
29
|
+
maxids.push(maxid)
|
28
30
|
end
|
29
|
-
[
|
30
|
-
NMatrix.new([labels.size, 1], labels, dtype: label_dtype)]
|
31
|
+
[convert_to_matrix(ftvecs, maxids.max + 1, value_class), labels]
|
31
32
|
end
|
32
33
|
|
33
34
|
# Dump the dataset with the libsvm file format.
|
34
35
|
#
|
35
|
-
# @param data [
|
36
|
-
# @param labels [
|
37
|
-
# @param filename [String]
|
36
|
+
# @param data [Array] (n_samples x n_features) matrix consisting of feature vectors.
|
37
|
+
# @param labels [Array] (n_samples) vector consisting of labels or target values.
|
38
|
+
# @param filename [String] Path to the output libsvm file.
|
38
39
|
# @param zero_based [Boolean] Whether the column index starts from 0 (true) or 1 (false).
|
39
40
|
def dump_libsvm_file(data, labels, filename, zero_based: false)
|
40
|
-
n_samples = [data.
|
41
|
-
|
42
|
-
|
41
|
+
n_samples = [data.size, labels.size].min
|
42
|
+
label_format = detect_format(labels.first)
|
43
|
+
value_format = detect_format(data.flatten.first)
|
43
44
|
File.open(filename, 'w') do |file|
|
44
|
-
n_samples.times
|
45
|
-
file.puts(dump_libsvm_line(labels[n], data.row(n),
|
46
|
-
label_type, value_type, zero_based))
|
47
|
-
end
|
45
|
+
n_samples.times { |n| file.puts(dump_libsvm_line(labels[n], data[n], label_format, value_format, zero_based)) }
|
48
46
|
end
|
49
47
|
end
|
50
48
|
|
51
49
|
private
|
52
50
|
|
53
|
-
def
|
54
|
-
|
55
|
-
|
56
|
-
ftvec = tokens.map do |el|
|
51
|
+
def parse_libsvm_row(row, zero_based, label_type, value_type)
|
52
|
+
label = convert_type(row.shift, label_type)
|
53
|
+
ftvec = row.map do |el|
|
57
54
|
idx, val = el.split(':')
|
58
|
-
idx
|
59
|
-
[idx, val.to_f]
|
55
|
+
[idx.to_i - (zero_based == false ? 1 : 0), convert_type(val, value_type)]
|
60
56
|
end
|
61
|
-
max_idx = ftvec.map { |
|
62
|
-
max_idx ||= 0
|
57
|
+
max_idx = ftvec.map { |idx, _val| idx }.max || 0
|
63
58
|
[label, ftvec, max_idx]
|
64
59
|
end
|
65
60
|
|
66
|
-
def
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
61
|
+
def parse_dtype(dtype)
|
62
|
+
case dtype.to_s
|
63
|
+
when /^(int)/i
|
64
|
+
:int
|
65
|
+
when /^(float)/i
|
66
|
+
:float
|
67
|
+
when /^(complex)/i
|
68
|
+
:complex
|
69
|
+
else
|
70
|
+
:string
|
74
71
|
end
|
75
|
-
mat
|
76
72
|
end
|
77
73
|
|
78
|
-
def
|
74
|
+
def convert_type(value, dtype)
|
75
|
+
case dtype
|
76
|
+
when :int
|
77
|
+
value.to_i
|
78
|
+
when :float
|
79
|
+
value.to_f
|
80
|
+
when :complex
|
81
|
+
value.to_c
|
82
|
+
else
|
83
|
+
value
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
def convert_to_matrix(data, n_features, value_type)
|
88
|
+
z = convert_type(0, value_type)
|
89
|
+
data.map do |ft|
|
90
|
+
vec = Array.new(n_features) { z }
|
91
|
+
ft.each { |idx, val| vec[idx] = val }
|
92
|
+
vec
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def detect_format(data)
|
79
97
|
type = '%s'
|
80
|
-
type = '%d' if
|
81
|
-
type = '%.10g' if
|
98
|
+
type = '%d' if data.is_a?(Integer)
|
99
|
+
type = '%.10g' if data.is_a?(Float)
|
82
100
|
type
|
83
101
|
end
|
84
102
|
|
85
|
-
def dump_libsvm_line(label, ftvec,
|
86
|
-
line = format(
|
87
|
-
ftvec.
|
88
|
-
|
89
|
-
|
103
|
+
def dump_libsvm_line(label, ftvec, label_format, value_format, zero_based)
|
104
|
+
line = format(label_format, label)
|
105
|
+
ftvec.each_with_index do |val, n|
|
106
|
+
unless val.zero?
|
107
|
+
idx = n + (zero_based == false ? 1 : 0)
|
108
|
+
line += format(" %d:#{value_format}", idx, val)
|
109
|
+
end
|
90
110
|
end
|
91
111
|
line
|
92
112
|
end
|
data/lib/libsvmloader/version.rb
CHANGED
data/libsvmloader.gemspec
CHANGED
@@ -14,7 +14,7 @@ LibSVMLoader loads (and dumps) dataset file with the libsvm file format.
|
|
14
14
|
MSG
|
15
15
|
spec.description = <<MSG
|
16
16
|
LibSVMLoader is a class that loads (and dumps) dataset file with the libsvm file format.
|
17
|
-
The
|
17
|
+
The feature vectors and labels of dataset are represented by Ruby Array.
|
18
18
|
MSG
|
19
19
|
spec.homepage = 'https://github.com/yoshoku/libsvmloader'
|
20
20
|
spec.license = 'MIT'
|
@@ -28,9 +28,8 @@ MSG
|
|
28
28
|
|
29
29
|
spec.required_ruby_version = '>= 2.1'
|
30
30
|
|
31
|
-
spec.add_runtime_dependency 'nmatrix', '~> 0.2.3'
|
32
|
-
|
33
31
|
spec.add_development_dependency 'bundler', '~> 1.15'
|
32
|
+
spec.add_development_dependency 'coveralls', '~> 0.8'
|
34
33
|
spec.add_development_dependency 'rake', '~> 10.0'
|
35
34
|
spec.add_development_dependency 'rspec', '~> 3.0'
|
36
35
|
end
|
metadata
CHANGED
@@ -1,43 +1,43 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: libsvmloader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- yoshoku
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-08-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
14
|
+
name: bundler
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version:
|
20
|
-
type: :
|
19
|
+
version: '1.15'
|
20
|
+
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version:
|
26
|
+
version: '1.15'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
28
|
+
name: coveralls
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
33
|
+
version: '0.8'
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
40
|
+
version: '0.8'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: rake
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -68,7 +68,7 @@ dependencies:
|
|
68
68
|
version: '3.0'
|
69
69
|
description: |
|
70
70
|
LibSVMLoader is a class that loads (and dumps) dataset file with the libsvm file format.
|
71
|
-
The
|
71
|
+
The feature vectors and labels of dataset are represented by Ruby Array.
|
72
72
|
email:
|
73
73
|
- yoshoku@outlook.com
|
74
74
|
executables:
|
@@ -76,6 +76,7 @@ executables:
|
|
76
76
|
extensions: []
|
77
77
|
extra_rdoc_files: []
|
78
78
|
files:
|
79
|
+
- ".coveralls.yml"
|
79
80
|
- ".gitignore"
|
80
81
|
- ".rspec"
|
81
82
|
- ".rubocop.yml"
|
@@ -112,7 +113,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
112
113
|
version: '0'
|
113
114
|
requirements: []
|
114
115
|
rubyforge_project:
|
115
|
-
rubygems_version: 2.
|
116
|
+
rubygems_version: 2.7.6
|
116
117
|
signing_key:
|
117
118
|
specification_version: 4
|
118
119
|
summary: LibSVMLoader loads (and dumps) dataset file with the libsvm file format.
|