red-datasets 0.0.6 → 0.0.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +3 -3
- data/doc/text/news.md +31 -0
- data/lib/datasets.rb +3 -0
- data/lib/datasets/adult.rb +83 -0
- data/lib/datasets/dictionary.rb +59 -0
- data/lib/datasets/downloader.rb +35 -62
- data/lib/datasets/fashion-mnist.rb +12 -0
- data/lib/datasets/iris.rb +1 -1
- data/lib/datasets/mnist.rb +11 -6
- data/lib/datasets/penn-treebank.rb +2 -9
- data/lib/datasets/table.rb +17 -1
- data/lib/datasets/version.rb +1 -1
- data/lib/datasets/wine.rb +64 -0
- data/test/helper.rb +1 -0
- data/test/test-adult.rb +126 -0
- data/test/test-dictionary.rb +43 -0
- data/test/test-fashion-mnist.rb +137 -0
- data/test/test-mnist.rb +95 -70
- data/test/test-penn-treebank.rb +6 -6
- data/test/test-table.rb +22 -2
- data/test/test-wine.rb +58 -0
- metadata +15 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 222271b814e3a5ce23b5e0dd1d2578bffb84afdab10110b0869985c6056bfd3b
|
4
|
+
data.tar.gz: ac30931b3317ab04afd394b28a45a9206c784d78b3bcaf98fc3a2a48227c7930
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8a94a3d66baaed4948904e97dc53100d73ae96c528c09b02252caabd05b8545587abf6fbcba3a578725812327a9a2c8827bbb7e283ccd3d7e66753bf30035e2e
|
7
|
+
data.tar.gz: 2ab44b5aa3ee5da0ac8e8307546c71942938de4497bfec05fc929715a4e5ef6df1cb091bce0d5f12978582d2c9fa7eaffff9edd54be0d845627dccfce42a63dd
|
data/README.md
CHANGED
@@ -30,7 +30,7 @@ iris.each do |record|
|
|
30
30
|
record.sepal_width,
|
31
31
|
record.petal_length,
|
32
32
|
record.petal_width,
|
33
|
-
record.
|
33
|
+
record.label,
|
34
34
|
]
|
35
35
|
end
|
36
36
|
# => [5.1, 3.5, 1.4, 0.2, "Iris-setosa"]
|
@@ -48,7 +48,7 @@ p iris_hash[:petal_length]
|
|
48
48
|
# => [1.4, 1.4, .. , 4.7, ..
|
49
49
|
p iris_hash[:petal_width]
|
50
50
|
# => [0.2, 0.2, .. , 1.4, ..
|
51
|
-
p iris_hash[:
|
51
|
+
p iris_hash[:label]
|
52
52
|
# => ["Iris-setosa", "Iris-setosa", .. , "Iris-versicolor", ..
|
53
53
|
|
54
54
|
|
@@ -60,7 +60,7 @@ p iris_table.fetch_values(:sepal_length, :sepal_width, :petal_length, :petal_wid
|
|
60
60
|
[7.0, 3.2, 4.7, 1.4],
|
61
61
|
:
|
62
62
|
|
63
|
-
p iris_table[:
|
63
|
+
p iris_table[:label]
|
64
64
|
# => ["Iris-setosa", "Iris-setosa", .. , "Iris-versicolor", ..
|
65
65
|
```
|
66
66
|
|
data/doc/text/news.md
CHANGED
@@ -1,5 +1,36 @@
|
|
1
1
|
# News
|
2
2
|
|
3
|
+
## 0.0.7 - 2018-11-21
|
4
|
+
|
5
|
+
### Improvements
|
6
|
+
|
7
|
+
* `Datasets::Table#dictionary_encode`: Added.
|
8
|
+
[GitHub#22]
|
9
|
+
|
10
|
+
* `Datasets::Table#label_encode`: Added.
|
11
|
+
|
12
|
+
* `Datasets::Dictionary`: Added.
|
13
|
+
|
14
|
+
* `Datasets::Wine`: Added.
|
15
|
+
[GitHub#26][Patch by Ryuta Suzuki]
|
16
|
+
|
17
|
+
* `Datasets::FashionMNIST`: Added.
|
18
|
+
[GitHub#27][Patch by chimame]
|
19
|
+
|
20
|
+
* `Datasets::Iris::Record#label`: Renamed from `#class`. This is an
|
21
|
+
incompatible change.
|
22
|
+
|
23
|
+
* `Datasets::Adult`: Added.
|
24
|
+
[GitHub#30][Patch by Yasuo Honda]
|
25
|
+
|
26
|
+
### Thanks
|
27
|
+
|
28
|
+
* Ryuta Suzuki
|
29
|
+
|
30
|
+
* chimame
|
31
|
+
|
32
|
+
* Yasuo Honda
|
33
|
+
|
3
34
|
## 0.0.6 - 2018-07-25
|
4
35
|
|
5
36
|
### Improvements
|
data/lib/datasets.rb
CHANGED
@@ -1,7 +1,10 @@
|
|
1
1
|
require "datasets/version"
|
2
2
|
|
3
|
+
require "datasets/adult"
|
3
4
|
require "datasets/cifar"
|
5
|
+
require "datasets/fashion-mnist"
|
4
6
|
require "datasets/iris"
|
5
7
|
require "datasets/mnist"
|
6
8
|
require "datasets/penn-treebank"
|
7
9
|
require "datasets/wikipedia"
|
10
|
+
require "datasets/wine"
|
@@ -0,0 +1,83 @@
|
|
1
|
+
require "csv"
|
2
|
+
|
3
|
+
require_relative "dataset"
|
4
|
+
|
5
|
+
module Datasets
|
6
|
+
class Adult < Dataset
|
7
|
+
Record = Struct.new(
|
8
|
+
:age,
|
9
|
+
:work_class,
|
10
|
+
:final_weight,
|
11
|
+
:education,
|
12
|
+
:n_education_years,
|
13
|
+
:marital_status,
|
14
|
+
:occupation,
|
15
|
+
:relationship,
|
16
|
+
:race,
|
17
|
+
:sex,
|
18
|
+
:capital_gain,
|
19
|
+
:capital_loss,
|
20
|
+
:hours_per_week,
|
21
|
+
:native_country,
|
22
|
+
:label
|
23
|
+
)
|
24
|
+
|
25
|
+
def initialize(type: :train)
|
26
|
+
unless [:train, :test].include?(type)
|
27
|
+
raise ArgumentError, 'Please set type :train or :test'
|
28
|
+
end
|
29
|
+
|
30
|
+
super()
|
31
|
+
@type = type
|
32
|
+
@metadata.id = "adult-#{@type}"
|
33
|
+
@metadata.name = "Adult: #{@type}"
|
34
|
+
@metadata.url = "http://archive.ics.uci.edu/ml/datasets/adult"
|
35
|
+
@metadata.description = lambda do
|
36
|
+
read_names
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def each
|
41
|
+
return to_enum(__method__) unless block_given?
|
42
|
+
|
43
|
+
open_data do |csv|
|
44
|
+
csv.each do |row|
|
45
|
+
next if row[0].nil?
|
46
|
+
record = Record.new(*row)
|
47
|
+
yield(record)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
private
|
53
|
+
def open_data
|
54
|
+
case @type
|
55
|
+
when :train
|
56
|
+
ext = "data"
|
57
|
+
when :test
|
58
|
+
ext = "test"
|
59
|
+
end
|
60
|
+
data_path = cache_dir_path + "adult-#{ext}.csv"
|
61
|
+
unless data_path.exist?
|
62
|
+
data_url = "http://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.#{ext}"
|
63
|
+
download(data_path, data_url)
|
64
|
+
end
|
65
|
+
CSV.open(data_path,
|
66
|
+
{
|
67
|
+
converters: [:numeric, lambda {|f| f.strip}],
|
68
|
+
skip_lines: /\A\|/,
|
69
|
+
}) do |csv|
|
70
|
+
yield(csv)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def read_names
|
75
|
+
names_path = cache_dir_path + "adult.names"
|
76
|
+
unless names_path.exist?
|
77
|
+
names_url = "https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.names"
|
78
|
+
download(names_path, names_url)
|
79
|
+
end
|
80
|
+
names_path.read
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
module Datasets
|
2
|
+
class Dictionary
|
3
|
+
include Enumerable
|
4
|
+
|
5
|
+
def initialize(values)
|
6
|
+
build_dictionary(values)
|
7
|
+
end
|
8
|
+
|
9
|
+
def id(value)
|
10
|
+
@value_to_id[value]
|
11
|
+
end
|
12
|
+
|
13
|
+
def value(id)
|
14
|
+
@id_to_value[id]
|
15
|
+
end
|
16
|
+
|
17
|
+
def ids
|
18
|
+
@id_to_value.keys
|
19
|
+
end
|
20
|
+
|
21
|
+
def values
|
22
|
+
@id_to_value.values
|
23
|
+
end
|
24
|
+
|
25
|
+
def each(&block)
|
26
|
+
@id_to_value.each(&block)
|
27
|
+
end
|
28
|
+
|
29
|
+
def size
|
30
|
+
@id_to_value.size
|
31
|
+
end
|
32
|
+
alias_method :length, :size
|
33
|
+
|
34
|
+
def encode(values)
|
35
|
+
values.collect do |value|
|
36
|
+
id(value)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def decode(ids)
|
41
|
+
ids.collect do |id|
|
42
|
+
value(id)
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
private
|
47
|
+
def build_dictionary(values)
|
48
|
+
@id_to_value = {}
|
49
|
+
@value_to_id = {}
|
50
|
+
id = 0
|
51
|
+
values.each do |value|
|
52
|
+
next if @value_to_id.key?(value)
|
53
|
+
@id_to_value[id] = value
|
54
|
+
@value_to_id[value] = id
|
55
|
+
id += 1
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
data/lib/datasets/downloader.rb
CHANGED
@@ -3,7 +3,7 @@ begin
|
|
3
3
|
require "io/console"
|
4
4
|
rescue LoadError
|
5
5
|
end
|
6
|
-
require "
|
6
|
+
require "net/http"
|
7
7
|
require "pathname"
|
8
8
|
|
9
9
|
module Datasets
|
@@ -15,84 +15,57 @@ module Datasets
|
|
15
15
|
url = URI.parse(url)
|
16
16
|
end
|
17
17
|
@url = url
|
18
|
-
@url.
|
18
|
+
unless @url.is_a?(URI::HTTP)
|
19
|
+
raise ArgumentError, "download URL must be HTTP or HTTPS: <#{@url}>"
|
20
|
+
end
|
19
21
|
end
|
20
22
|
|
21
23
|
def download(output_path)
|
22
24
|
output_path.parent.mkpath
|
23
25
|
|
26
|
+
headers = {"User-Agent" => "Red Datasets/#{VERSION}"}
|
24
27
|
start = nil
|
25
28
|
partial_output_path = Pathname.new("#{output_path}.partial")
|
26
29
|
if partial_output_path.exist?
|
27
30
|
start = partial_output_path.size
|
31
|
+
headers["Range"] = "bytes=#{start}-"
|
28
32
|
end
|
29
33
|
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
}
|
45
|
-
if start
|
46
|
-
options["Range"] = "bytes=#{start}-"
|
47
|
-
end
|
34
|
+
Net::HTTP.start(@url.hostname,
|
35
|
+
@url.port,
|
36
|
+
:use_ssl => (@url.scheme == "https")) do |http|
|
37
|
+
request = Net::HTTP::Get.new(@url.path, headers)
|
38
|
+
http.request(request) do |response|
|
39
|
+
case response
|
40
|
+
when Net::HTTPPartialContent
|
41
|
+
mode = "ab"
|
42
|
+
when Net::HTTPSuccess
|
43
|
+
start = nil
|
44
|
+
mode = "wb"
|
45
|
+
else
|
46
|
+
break
|
47
|
+
end
|
48
48
|
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
49
|
+
base_name = @url.path.split("/").last
|
50
|
+
size_current = 0
|
51
|
+
size_max = response.content_length
|
52
|
+
if start
|
53
|
+
size_current += start
|
54
|
+
size_max += start
|
55
|
+
end
|
56
|
+
progress_reporter = ProgressReporter.new(base_name, size_max)
|
57
|
+
partial_output_path.open(mode) do |output|
|
58
|
+
response.read_body do |chunk|
|
59
|
+
size_current += chunk.bytesize
|
60
|
+
progress_reporter.report(size_current)
|
61
|
+
output.write(chunk)
|
62
|
+
end
|
63
|
+
end
|
58
64
|
end
|
59
|
-
raise
|
60
65
|
end
|
61
|
-
|
62
66
|
FileUtils.mv(partial_output_path, output_path)
|
63
67
|
end
|
64
68
|
|
65
|
-
private
|
66
|
-
def copy_stream(input, partial_output_path)
|
67
|
-
if partial_output_path.exist?
|
68
|
-
# TODO: It's better that we use "206 Partial Content" response
|
69
|
-
# to detect partial response.
|
70
|
-
partial_head = partial_output_path.open("rb") do |partial_output|
|
71
|
-
partial_output.read(256)
|
72
|
-
end
|
73
|
-
input_head = input.read(partial_head.bytesize)
|
74
|
-
input.rewind
|
75
|
-
if partial_head == input_head
|
76
|
-
mode = "wb"
|
77
|
-
else
|
78
|
-
mode = "ab"
|
79
|
-
end
|
80
|
-
else
|
81
|
-
mode = "wb"
|
82
|
-
end
|
83
|
-
partial_output_path.open(mode) do |partial_output|
|
84
|
-
IO.copy_stream(input, partial_output)
|
85
|
-
end
|
86
|
-
end
|
87
|
-
|
88
|
-
module CurrentBufferReadable
|
89
|
-
attr_reader :current_buffer
|
90
|
-
def buffer_open(buffer, proxy, options)
|
91
|
-
@current_buffer = buffer
|
92
|
-
super
|
93
|
-
end
|
94
|
-
end
|
95
|
-
|
96
69
|
class ProgressReporter
|
97
70
|
def initialize(base_name, size_max)
|
98
71
|
@base_name = base_name
|
data/lib/datasets/iris.rb
CHANGED
data/lib/datasets/mnist.rb
CHANGED
@@ -6,6 +6,7 @@ class SetTypeError < StandardError; end
|
|
6
6
|
|
7
7
|
module Datasets
|
8
8
|
class MNIST < Dataset
|
9
|
+
BASE_URL = "http://yann.lecun.com/exdb/mnist/"
|
9
10
|
|
10
11
|
class Record < Struct.new(:data, :label)
|
11
12
|
def pixels
|
@@ -26,9 +27,9 @@ module Datasets
|
|
26
27
|
|
27
28
|
super()
|
28
29
|
|
29
|
-
@metadata.id = "
|
30
|
-
@metadata.name = "
|
31
|
-
@metadata.url =
|
30
|
+
@metadata.id = "#{dataset_name.downcase}-#{type}"
|
31
|
+
@metadata.name = "#{dataset_name}: #{type}"
|
32
|
+
@metadata.url = self.class::BASE_URL
|
32
33
|
@type = type
|
33
34
|
|
34
35
|
case type
|
@@ -44,7 +45,7 @@ module Datasets
|
|
44
45
|
|
45
46
|
image_path = cache_dir_path + target_file(:image)
|
46
47
|
label_path = cache_dir_path + target_file(:label)
|
47
|
-
base_url =
|
48
|
+
base_url = self.class::BASE_URL
|
48
49
|
|
49
50
|
unless image_path.exist?
|
50
51
|
download(image_path, base_url + target_file(:image))
|
@@ -66,7 +67,7 @@ module Datasets
|
|
66
67
|
n_bytes = n_uint32s * 4
|
67
68
|
mnist_magic_number = 2051
|
68
69
|
magic, n_images, n_rows, n_cols = f.read(n_bytes).unpack("N*")
|
69
|
-
raise
|
70
|
+
raise "This is not #{dataset_name} image file" if magic != mnist_magic_number
|
70
71
|
n_images.times do |i|
|
71
72
|
data = f.read(n_rows * n_cols)
|
72
73
|
label = labels[i]
|
@@ -100,9 +101,13 @@ module Datasets
|
|
100
101
|
n_bytes = n_uint32s * 2
|
101
102
|
mnist_magic_number = 2049
|
102
103
|
magic, n_labels = f.read(n_bytes).unpack('N2')
|
103
|
-
raise
|
104
|
+
raise "This is not #{dataset_name} label file" if magic != mnist_magic_number
|
104
105
|
f.read(n_labels).unpack('C*')
|
105
106
|
end
|
106
107
|
end
|
108
|
+
|
109
|
+
def dataset_name
|
110
|
+
"MNIST"
|
111
|
+
end
|
107
112
|
end
|
108
113
|
end
|
@@ -2,7 +2,7 @@ require_relative "dataset"
|
|
2
2
|
|
3
3
|
module Datasets
|
4
4
|
class PennTreebank < Dataset
|
5
|
-
Record = Struct.new(:word
|
5
|
+
Record = Struct.new(:word)
|
6
6
|
|
7
7
|
DESCRIPTION = <<~DESC
|
8
8
|
`Penn Tree Bank <https://www.cis.upenn.edu/~treebank/>`_ is originally a
|
@@ -46,17 +46,10 @@ module Datasets
|
|
46
46
|
|
47
47
|
private
|
48
48
|
def parse_data(data_path)
|
49
|
-
index = 0
|
50
|
-
vocabulary = {}
|
51
49
|
File.open(data_path) do |f|
|
52
50
|
f.each_line do |line|
|
53
51
|
line.split.each do |word|
|
54
|
-
word
|
55
|
-
unless vocabulary.key?(word)
|
56
|
-
vocabulary[word] = index
|
57
|
-
index += 1
|
58
|
-
end
|
59
|
-
yield(Record.new(word, vocabulary[word]))
|
52
|
+
yield(Record.new(word.strip))
|
60
53
|
end
|
61
54
|
end
|
62
55
|
end
|
data/lib/datasets/table.rb
CHANGED
@@ -1,9 +1,12 @@
|
|
1
|
+
require "datasets/dictionary"
|
2
|
+
|
1
3
|
module Datasets
|
2
4
|
class Table
|
3
5
|
include Enumerable
|
4
6
|
|
5
7
|
def initialize(dataset)
|
6
8
|
@dataset = dataset
|
9
|
+
@dictionaries = {}
|
7
10
|
end
|
8
11
|
|
9
12
|
def each(&block)
|
@@ -11,7 +14,16 @@ module Datasets
|
|
11
14
|
end
|
12
15
|
|
13
16
|
def [](name)
|
14
|
-
columner_data[name
|
17
|
+
columner_data[normalize_name(name)]
|
18
|
+
end
|
19
|
+
|
20
|
+
def dictionary_encode(name)
|
21
|
+
@dictionaries[normalize_name(name)] ||= Dictionary.new(self[name])
|
22
|
+
end
|
23
|
+
|
24
|
+
def label_encode(name)
|
25
|
+
dictionary = dictionary_encode(name)
|
26
|
+
dictionary.encode(self[name])
|
15
27
|
end
|
16
28
|
|
17
29
|
def fetch_values(*keys)
|
@@ -55,5 +67,9 @@ module Datasets
|
|
55
67
|
def columner_data
|
56
68
|
@columns ||= to_h
|
57
69
|
end
|
70
|
+
|
71
|
+
def normalize_name(name)
|
72
|
+
name.to_sym
|
73
|
+
end
|
58
74
|
end
|
59
75
|
end
|
data/lib/datasets/version.rb
CHANGED
@@ -0,0 +1,64 @@
|
|
1
|
+
require 'csv'
|
2
|
+
|
3
|
+
require_relative 'dataset'
|
4
|
+
|
5
|
+
module Datasets
|
6
|
+
class Wine < Dataset
|
7
|
+
Record = Struct.new(:label,
|
8
|
+
:alcohol,
|
9
|
+
:malic_acid,
|
10
|
+
:ash,
|
11
|
+
:alcalinity_of_ash,
|
12
|
+
:n_magnesiums,
|
13
|
+
:total_phenols,
|
14
|
+
:total_flavonoids,
|
15
|
+
:total_nonflavanoid_phenols,
|
16
|
+
:total_proanthocyanins,
|
17
|
+
:color_intensity,
|
18
|
+
:hue,
|
19
|
+
:optical_nucleic_acid_concentration,
|
20
|
+
:n_prolines)
|
21
|
+
|
22
|
+
def initialize
|
23
|
+
super
|
24
|
+
@metadata.id = 'wine'
|
25
|
+
@metadata.name = 'Wine'
|
26
|
+
@metadata.url = 'http://archive.ics.uci.edu/ml/datasets/wine'
|
27
|
+
@metadata.description = -> { read_names }
|
28
|
+
end
|
29
|
+
|
30
|
+
def each
|
31
|
+
return to_enum(__method__) unless block_given?
|
32
|
+
|
33
|
+
open_data do |csv|
|
34
|
+
csv.each do |row|
|
35
|
+
next if row[0].nil?
|
36
|
+
record = Record.new(*row)
|
37
|
+
yield(record)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
private
|
43
|
+
|
44
|
+
def read_names
|
45
|
+
names_path = cache_dir_path + 'wine.names'
|
46
|
+
unless names_path.exist?
|
47
|
+
names_url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.names'
|
48
|
+
download(names_path, names_url)
|
49
|
+
end
|
50
|
+
names_path.read
|
51
|
+
end
|
52
|
+
|
53
|
+
def open_data
|
54
|
+
data_path = cache_dir_path + 'wine.data'
|
55
|
+
unless data_path.exist?
|
56
|
+
data_url = 'http://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data'
|
57
|
+
download(data_path, data_url)
|
58
|
+
end
|
59
|
+
CSV.open(data_path, converters: %i[numeric]) do |csv|
|
60
|
+
yield(csv)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
data/test/helper.rb
CHANGED
data/test/test-adult.rb
ADDED
@@ -0,0 +1,126 @@
|
|
1
|
+
class AdultTest < Test::Unit::TestCase
|
2
|
+
sub_test_case("train") do
|
3
|
+
def setup
|
4
|
+
@dataset = Datasets::Adult.new(type: :train)
|
5
|
+
end
|
6
|
+
|
7
|
+
def record(*args)
|
8
|
+
Datasets::Adult::Record.new(*args)
|
9
|
+
end
|
10
|
+
|
11
|
+
test("#each") do
|
12
|
+
records = @dataset.each.to_a
|
13
|
+
assert_equal([
|
14
|
+
32561,
|
15
|
+
{
|
16
|
+
:age => 39,
|
17
|
+
:work_class => "State-gov",
|
18
|
+
:final_weight => 77516,
|
19
|
+
:education => "Bachelors",
|
20
|
+
:n_education_years => 13,
|
21
|
+
:marital_status => "Never-married",
|
22
|
+
:occupation => "Adm-clerical",
|
23
|
+
:relationship => "Not-in-family",
|
24
|
+
:race => "White",
|
25
|
+
:sex => "Male",
|
26
|
+
:capital_gain => 2174,
|
27
|
+
:capital_loss => 0,
|
28
|
+
:hours_per_week => 40,
|
29
|
+
:native_country => "United-States",
|
30
|
+
:label => "<=50K"
|
31
|
+
},
|
32
|
+
{
|
33
|
+
:age => 52,
|
34
|
+
:work_class => "Self-emp-inc",
|
35
|
+
:final_weight => 287927,
|
36
|
+
:education => "HS-grad",
|
37
|
+
:n_education_years => 9,
|
38
|
+
:marital_status => "Married-civ-spouse",
|
39
|
+
:occupation => "Exec-managerial",
|
40
|
+
:relationship => "Wife",
|
41
|
+
:race => "White",
|
42
|
+
:sex => "Female",
|
43
|
+
:capital_gain => 15024,
|
44
|
+
:capital_loss => 0,
|
45
|
+
:hours_per_week => 40,
|
46
|
+
:native_country => "United-States",
|
47
|
+
:label => ">50K"
|
48
|
+
}
|
49
|
+
],
|
50
|
+
[
|
51
|
+
records.size,
|
52
|
+
records[0].to_h,
|
53
|
+
records[-1].to_h
|
54
|
+
])
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
sub_test_case("test") do
|
59
|
+
def setup
|
60
|
+
@dataset = Datasets::Adult.new(type: :test)
|
61
|
+
end
|
62
|
+
|
63
|
+
def record(*args)
|
64
|
+
Datasets::Adult::Record.new(*args)
|
65
|
+
end
|
66
|
+
|
67
|
+
test("#each") do
|
68
|
+
records = @dataset.each.to_a
|
69
|
+
assert_equal([
|
70
|
+
16281,
|
71
|
+
{
|
72
|
+
:age => 25,
|
73
|
+
:work_class => "Private",
|
74
|
+
:final_weight => 226802,
|
75
|
+
:education => "11th",
|
76
|
+
:n_education_years => 7,
|
77
|
+
:marital_status => "Never-married",
|
78
|
+
:occupation => "Machine-op-inspct",
|
79
|
+
:relationship => "Own-child",
|
80
|
+
:race => "Black",
|
81
|
+
:sex => "Male",
|
82
|
+
:capital_gain => 0,
|
83
|
+
:capital_loss => 0,
|
84
|
+
:hours_per_week => 40,
|
85
|
+
:native_country => "United-States",
|
86
|
+
:label => "<=50K."
|
87
|
+
},
|
88
|
+
{
|
89
|
+
:age => 35,
|
90
|
+
:work_class => "Self-emp-inc",
|
91
|
+
:final_weight => 182148,
|
92
|
+
:education => "Bachelors",
|
93
|
+
:n_education_years => 13,
|
94
|
+
:marital_status => "Married-civ-spouse",
|
95
|
+
:occupation => "Exec-managerial",
|
96
|
+
:relationship => "Husband",
|
97
|
+
:race => "White",
|
98
|
+
:sex => "Male",
|
99
|
+
:capital_gain => 0,
|
100
|
+
:capital_loss => 0,
|
101
|
+
:hours_per_week => 60,
|
102
|
+
:native_country => "United-States",
|
103
|
+
:label => ">50K."
|
104
|
+
}
|
105
|
+
],
|
106
|
+
[
|
107
|
+
records.size,
|
108
|
+
records[0].to_h,
|
109
|
+
records[-1].to_h
|
110
|
+
])
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
sub_test_case("#metadata") do
|
115
|
+
def setup
|
116
|
+
@dataset = Datasets::Adult.new(type: :train)
|
117
|
+
end
|
118
|
+
|
119
|
+
test("#description") do
|
120
|
+
description = @dataset.metadata.description
|
121
|
+
assert do
|
122
|
+
description.start_with?("| This data was extracted from the census bureau database found at")
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
class DictionaryTest < Test::Unit::TestCase
|
2
|
+
def setup
|
3
|
+
penn_treebank = Datasets::PennTreebank.new(type: :test)
|
4
|
+
@dictionary = penn_treebank.to_table.dictionary_encode(:word)
|
5
|
+
end
|
6
|
+
|
7
|
+
test("#id") do
|
8
|
+
assert_equal(95, @dictionary.id("<unk>"))
|
9
|
+
end
|
10
|
+
|
11
|
+
test("#value") do
|
12
|
+
assert_equal("<unk>", @dictionary.value(95))
|
13
|
+
end
|
14
|
+
|
15
|
+
test("#ids") do
|
16
|
+
assert_equal([0, 1, 2, 3, 4], @dictionary.ids.first(5))
|
17
|
+
end
|
18
|
+
|
19
|
+
test("#values") do
|
20
|
+
assert_equal(["no", "it", "was", "n't", "black"],
|
21
|
+
@dictionary.values.first(5))
|
22
|
+
end
|
23
|
+
|
24
|
+
test("#each") do
|
25
|
+
assert_equal([
|
26
|
+
[0, "no"],
|
27
|
+
[1, "it"],
|
28
|
+
[2, "was"],
|
29
|
+
[3, "n't"],
|
30
|
+
[4, "black"],
|
31
|
+
],
|
32
|
+
@dictionary.each.first(5).to_a)
|
33
|
+
end
|
34
|
+
|
35
|
+
test("#size") do
|
36
|
+
assert_equal(6048, @dictionary.size)
|
37
|
+
end
|
38
|
+
|
39
|
+
test("#length") do
|
40
|
+
assert_equal(@dictionary.size,
|
41
|
+
@dictionary.length)
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,137 @@
|
|
1
|
+
class FashionMNISTTest < Test::Unit::TestCase
|
2
|
+
sub_test_case("Normal") do
|
3
|
+
sub_test_case("train") do
|
4
|
+
def setup
|
5
|
+
@dataset = Datasets::FashionMNIST.new(type: :train)
|
6
|
+
end
|
7
|
+
|
8
|
+
test("#each") do
|
9
|
+
records = @dataset.each.to_a
|
10
|
+
assert_equal([
|
11
|
+
60000,
|
12
|
+
[
|
13
|
+
9,
|
14
|
+
784,
|
15
|
+
[0, 0, 0, 0, 237, 226, 217, 223, 222, 219],
|
16
|
+
[220, 232, 246, 0, 3, 202, 228, 224, 221, 211],
|
17
|
+
],
|
18
|
+
[
|
19
|
+
5,
|
20
|
+
784,
|
21
|
+
[129, 153, 34, 0, 3, 3, 0, 3, 0, 24],
|
22
|
+
[180, 177, 177, 47, 101, 235, 194, 223, 232, 255],
|
23
|
+
],
|
24
|
+
],
|
25
|
+
[
|
26
|
+
records.size,
|
27
|
+
[
|
28
|
+
records[0].label,
|
29
|
+
records[0].pixels.size,
|
30
|
+
records[0].pixels[400, 10],
|
31
|
+
records[0].pixels[500, 10],
|
32
|
+
],
|
33
|
+
[
|
34
|
+
records[-1].label,
|
35
|
+
records[-1].pixels.size,
|
36
|
+
records[-1].pixels[400, 10],
|
37
|
+
records[-1].pixels[500, 10],
|
38
|
+
],
|
39
|
+
])
|
40
|
+
end
|
41
|
+
|
42
|
+
test("#to_table") do
|
43
|
+
table_data = @dataset.to_table
|
44
|
+
assert_equal([
|
45
|
+
[0, 0, 0, 0, 237, 226, 217, 223, 222, 219],
|
46
|
+
[129, 153, 34, 0, 3, 3, 0, 3, 0, 24],
|
47
|
+
],
|
48
|
+
[
|
49
|
+
table_data[:pixels][0][400, 10],
|
50
|
+
table_data[:pixels][-1][400, 10],
|
51
|
+
])
|
52
|
+
end
|
53
|
+
|
54
|
+
sub_test_case("#metadata") do
|
55
|
+
test("#id") do
|
56
|
+
assert_equal("fashion-mnist-train", @dataset.metadata.id)
|
57
|
+
end
|
58
|
+
|
59
|
+
test("#name") do
|
60
|
+
assert_equal("Fashion-MNIST: train", @dataset.metadata.name)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
sub_test_case("test") do
|
66
|
+
def setup
|
67
|
+
@dataset = Datasets::FashionMNIST.new(type: :test)
|
68
|
+
end
|
69
|
+
|
70
|
+
test("#each") do
|
71
|
+
records = @dataset.each.to_a
|
72
|
+
assert_equal([
|
73
|
+
10000,
|
74
|
+
[
|
75
|
+
9,
|
76
|
+
784,
|
77
|
+
[1, 0, 0, 0, 98, 136, 110, 109, 110, 162],
|
78
|
+
[172, 161, 189, 62, 0, 68, 94, 90, 111, 114],
|
79
|
+
],
|
80
|
+
[
|
81
|
+
5,
|
82
|
+
784,
|
83
|
+
[45, 45, 69, 128, 100, 120, 132, 123, 135, 171],
|
84
|
+
[63, 74, 72, 0, 1, 0, 0, 0, 4, 85],
|
85
|
+
],
|
86
|
+
],
|
87
|
+
[
|
88
|
+
records.size,
|
89
|
+
[
|
90
|
+
records[0].label,
|
91
|
+
records[0].pixels.size,
|
92
|
+
records[0].pixels[400, 10],
|
93
|
+
records[0].pixels[500, 10],
|
94
|
+
],
|
95
|
+
[
|
96
|
+
records[-1].label,
|
97
|
+
records[-1].pixels.size,
|
98
|
+
records[-1].pixels[400, 10],
|
99
|
+
records[-1].pixels[500, 10],
|
100
|
+
],
|
101
|
+
])
|
102
|
+
end
|
103
|
+
|
104
|
+
test("#to_table") do
|
105
|
+
table_data = @dataset.to_table
|
106
|
+
assert_equal([
|
107
|
+
[1, 0, 0, 0, 98, 136, 110, 109, 110, 162],
|
108
|
+
[45, 45, 69, 128, 100, 120, 132, 123, 135, 171],
|
109
|
+
],
|
110
|
+
[
|
111
|
+
table_data[:pixels][0][400, 10],
|
112
|
+
table_data[:pixels][-1][400, 10],
|
113
|
+
])
|
114
|
+
end
|
115
|
+
|
116
|
+
sub_test_case("#metadata") do
|
117
|
+
test("#id") do
|
118
|
+
assert_equal("fashion-mnist-test", @dataset.metadata.id)
|
119
|
+
end
|
120
|
+
|
121
|
+
test("#name") do
|
122
|
+
assert_equal("Fashion-MNIST: test", @dataset.metadata.name)
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
sub_test_case("Abnormal") do
|
129
|
+
test("invalid type") do
|
130
|
+
invalid_type = :invalid
|
131
|
+
message = "Please set type :train or :test: #{invalid_type.inspect}"
|
132
|
+
assert_raise(ArgumentError.new(message)) do
|
133
|
+
Datasets::FashionMNIST.new(type: invalid_type)
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
data/test/test-mnist.rb
CHANGED
@@ -1,100 +1,125 @@
|
|
1
1
|
class MNISTTest < Test::Unit::TestCase
|
2
|
-
include Helper::Sandbox
|
3
|
-
|
4
2
|
sub_test_case("Normal") do
|
5
|
-
def setup_data
|
6
|
-
setup_sandbox
|
7
|
-
|
8
|
-
def @dataset.cache_dir_path
|
9
|
-
@cache_dir_path
|
10
|
-
end
|
11
|
-
|
12
|
-
def @dataset.cache_dir_path=(path)
|
13
|
-
@cache_dir_path = path
|
14
|
-
end
|
15
|
-
@dataset.cache_dir_path = @tmp_dir
|
16
|
-
|
17
|
-
def @dataset.download(output_path, url)
|
18
|
-
image_magic_number = 2051
|
19
|
-
label_magic_number = 2049
|
20
|
-
n_image, image_size_x, image_size_y, label = 10, 28, 28, 1
|
21
|
-
|
22
|
-
Zlib::GzipWriter.open(output_path) do |gz|
|
23
|
-
if output_path.basename.to_s.include?("-images-")
|
24
|
-
image_data = ([image_magic_number, n_image]).pack('N2') +
|
25
|
-
([image_size_x,image_size_y]).pack('N2') +
|
26
|
-
([0] * image_size_x * image_size_y).pack("C*") * n_image
|
27
|
-
gz.puts(image_data)
|
28
|
-
else
|
29
|
-
label_data = ([label_magic_number, n_image]).pack('N2') +
|
30
|
-
([label] * n_image).pack("C*")
|
31
|
-
gz.puts(label_data)
|
32
|
-
end
|
33
|
-
end
|
34
|
-
end
|
35
|
-
end
|
36
|
-
|
37
|
-
def teardown
|
38
|
-
teardown_sandbox
|
39
|
-
end
|
40
|
-
|
41
3
|
sub_test_case("train") do
|
42
4
|
def setup
|
43
5
|
@dataset = Datasets::MNIST.new(type: :train)
|
44
|
-
setup_data()
|
45
6
|
end
|
46
7
|
|
47
8
|
test("#each") do
|
48
|
-
|
49
|
-
{
|
50
|
-
:label => record.label,
|
51
|
-
:pixels => record.pixels
|
52
|
-
}
|
53
|
-
end
|
54
|
-
|
9
|
+
records = @dataset.each.to_a
|
55
10
|
assert_equal([
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
11
|
+
60000,
|
12
|
+
[
|
13
|
+
5,
|
14
|
+
784,
|
15
|
+
[0, 0, 0, 49, 238, 253, 253, 253, 253, 253],
|
16
|
+
[0, 0, 0, 0, 0, 81, 240, 253, 253, 119],
|
17
|
+
],
|
18
|
+
[8,
|
19
|
+
784,
|
20
|
+
[0, 0, 0, 0, 0, 0, 0, 0, 0, 62],
|
21
|
+
[0, 0, 190, 196, 14, 2, 97, 254, 252, 146],
|
22
|
+
],
|
23
|
+
],
|
24
|
+
[
|
25
|
+
records.size,
|
26
|
+
[
|
27
|
+
records[0].label,
|
28
|
+
records[0].pixels.size,
|
29
|
+
records[0].pixels[200, 10],
|
30
|
+
records[0].pixels[400, 10],
|
31
|
+
],
|
32
|
+
[
|
33
|
+
records[-1].label,
|
34
|
+
records[-1].pixels.size,
|
35
|
+
records[-1].pixels[200, 10],
|
36
|
+
records[-1].pixels[400, 10],
|
37
|
+
],
|
38
|
+
])
|
62
39
|
end
|
63
40
|
|
64
41
|
test("#to_table") do
|
65
42
|
table_data = @dataset.to_table
|
66
|
-
assert_equal([
|
67
|
-
|
43
|
+
assert_equal([
|
44
|
+
[0, 0, 0, 49, 238, 253, 253, 253, 253, 253],
|
45
|
+
[0, 0, 0, 0, 0, 0, 0, 0, 0, 62],
|
46
|
+
],
|
47
|
+
[
|
48
|
+
table_data[:pixels][0][200, 10],
|
49
|
+
table_data[:pixels][-1][200, 10],
|
50
|
+
])
|
51
|
+
end
|
52
|
+
|
53
|
+
sub_test_case("#metadata") do
|
54
|
+
test("#id") do
|
55
|
+
assert_equal("mnist-train", @dataset.metadata.id)
|
56
|
+
end
|
57
|
+
|
58
|
+
test("#name") do
|
59
|
+
assert_equal("MNIST: train", @dataset.metadata.name)
|
60
|
+
end
|
68
61
|
end
|
69
62
|
end
|
70
63
|
|
71
64
|
sub_test_case("test") do
|
72
65
|
def setup
|
73
66
|
@dataset = Datasets::MNIST.new(type: :test)
|
74
|
-
setup_data()
|
75
67
|
end
|
76
68
|
|
77
69
|
test("#each") do
|
78
|
-
|
79
|
-
{
|
80
|
-
:label => record.label,
|
81
|
-
:pixels => record.pixels
|
82
|
-
}
|
83
|
-
end
|
84
|
-
|
70
|
+
records = @dataset.each.to_a
|
85
71
|
assert_equal([
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
72
|
+
10000,
|
73
|
+
[
|
74
|
+
7,
|
75
|
+
784,
|
76
|
+
[0, 0, 84, 185, 159, 151, 60, 36, 0, 0],
|
77
|
+
[0, 0, 0, 0, 0, 0, 0, 0, 59, 249],
|
78
|
+
],
|
79
|
+
[
|
80
|
+
6,
|
81
|
+
784,
|
82
|
+
[0, 0, 0, 0, 0, 15, 60, 60, 168, 253],
|
83
|
+
[253, 253, 132, 64, 0, 0, 18, 43, 157, 171],
|
84
|
+
],
|
85
|
+
],
|
86
|
+
[
|
87
|
+
records.size,
|
88
|
+
[
|
89
|
+
records[0].label,
|
90
|
+
records[0].pixels.size,
|
91
|
+
records[0].pixels[200, 10],
|
92
|
+
records[0].pixels[400, 10],
|
93
|
+
],
|
94
|
+
[
|
95
|
+
records[-1].label,
|
96
|
+
records[-1].pixels.size,
|
97
|
+
records[-1].pixels[200, 10],
|
98
|
+
records[-1].pixels[400, 10],
|
99
|
+
],
|
100
|
+
])
|
92
101
|
end
|
93
102
|
|
94
103
|
test("#to_table") do
|
95
104
|
table_data = @dataset.to_table
|
96
|
-
assert_equal([
|
97
|
-
|
105
|
+
assert_equal([
|
106
|
+
[0, 0, 84, 185, 159, 151, 60, 36, 0, 0],
|
107
|
+
[0, 0, 0, 0, 0, 15, 60, 60, 168, 253],
|
108
|
+
],
|
109
|
+
[
|
110
|
+
table_data[:pixels][0][200, 10],
|
111
|
+
table_data[:pixels][-1][200, 10],
|
112
|
+
])
|
113
|
+
end
|
114
|
+
|
115
|
+
sub_test_case("#metadata") do
|
116
|
+
test("#id") do
|
117
|
+
assert_equal("mnist-test", @dataset.metadata.id)
|
118
|
+
end
|
119
|
+
|
120
|
+
test("#name") do
|
121
|
+
assert_equal("MNIST: test", @dataset.metadata.name)
|
122
|
+
end
|
98
123
|
end
|
99
124
|
end
|
100
125
|
end
|
data/test/test-penn-treebank.rb
CHANGED
@@ -9,8 +9,8 @@ class PennTreebankTest < Test::Unit::TestCase
|
|
9
9
|
records = dataset.to_a
|
10
10
|
assert_equal([
|
11
11
|
887521,
|
12
|
-
record("aer"
|
13
|
-
record("<unk>"
|
12
|
+
record("aer"),
|
13
|
+
record("<unk>"),
|
14
14
|
],
|
15
15
|
[
|
16
16
|
records.size,
|
@@ -24,8 +24,8 @@ class PennTreebankTest < Test::Unit::TestCase
|
|
24
24
|
records = dataset.to_a
|
25
25
|
assert_equal([
|
26
26
|
78669,
|
27
|
-
record("no"
|
28
|
-
record("us"
|
27
|
+
record("no"),
|
28
|
+
record("us"),
|
29
29
|
],
|
30
30
|
[
|
31
31
|
records.size,
|
@@ -39,8 +39,8 @@ class PennTreebankTest < Test::Unit::TestCase
|
|
39
39
|
records = dataset.to_a
|
40
40
|
assert_equal([
|
41
41
|
70390,
|
42
|
-
record("consumers"
|
43
|
-
record("N"
|
42
|
+
record("consumers"),
|
43
|
+
record("N"),
|
44
44
|
],
|
45
45
|
[
|
46
46
|
records.size,
|
data/test/test-table.rb
CHANGED
@@ -8,6 +8,26 @@ class TableTest < Test::Unit::TestCase
|
|
8
8
|
@table[:petal_length].first(5))
|
9
9
|
end
|
10
10
|
|
11
|
+
test("#dictionary_encode") do
|
12
|
+
assert_equal([
|
13
|
+
[0, "Iris-setosa"],
|
14
|
+
[1, "Iris-versicolor"],
|
15
|
+
[2, "Iris-virginica"],
|
16
|
+
],
|
17
|
+
@table.dictionary_encode(:label).to_a)
|
18
|
+
end
|
19
|
+
|
20
|
+
test("#label_encode") do
|
21
|
+
label_encoded_labels = @table.label_encode(:label)
|
22
|
+
labels = @table[:label]
|
23
|
+
assert_equal([0, 1, 2],
|
24
|
+
[
|
25
|
+
label_encoded_labels[labels.find_index("Iris-setosa")],
|
26
|
+
label_encoded_labels[labels.find_index("Iris-versicolor")],
|
27
|
+
label_encoded_labels[labels.find_index("Iris-virginica")],
|
28
|
+
])
|
29
|
+
end
|
30
|
+
|
11
31
|
sub_test_case("#fetch_values") do
|
12
32
|
test("found") do
|
13
33
|
values = @table.fetch_values(:petal_length, :petal_width)
|
@@ -44,7 +64,7 @@ class TableTest < Test::Unit::TestCase
|
|
44
64
|
shorten_hash[name] = values.first(5)
|
45
65
|
end
|
46
66
|
assert_equal({
|
47
|
-
:
|
67
|
+
:label => ["Iris-setosa"] * 5,
|
48
68
|
:petal_length => [1.4, 1.4, 1.3, 1.5, 1.4],
|
49
69
|
:petal_width => [0.2, 0.2, 0.2, 0.2, 0.2],
|
50
70
|
:sepal_length => [5.1, 4.9, 4.7, 4.6, 5.0],
|
@@ -59,7 +79,7 @@ class TableTest < Test::Unit::TestCase
|
|
59
79
|
shorten_hash[name] = values.first(5)
|
60
80
|
end
|
61
81
|
assert_equal({
|
62
|
-
:
|
82
|
+
:label => ["Iris-setosa"] * 5,
|
63
83
|
:petal_length => [1.4, 1.4, 1.3, 1.5, 1.4],
|
64
84
|
:petal_width => [0.2, 0.2, 0.2, 0.2, 0.2],
|
65
85
|
:sepal_length => [5.1, 4.9, 4.7, 4.6, 5.0],
|
data/test/test-wine.rb
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
class WineTest < Test::Unit::TestCase
|
2
|
+
def setup
|
3
|
+
@dataset = Datasets::Wine.new
|
4
|
+
end
|
5
|
+
|
6
|
+
test('#each') do
|
7
|
+
records = @dataset.each.to_a
|
8
|
+
assert_equal([
|
9
|
+
178,
|
10
|
+
{
|
11
|
+
:alcalinity_of_ash => 15.6,
|
12
|
+
:alcohol => 14.23,
|
13
|
+
:ash => 2.43,
|
14
|
+
:label => 1,
|
15
|
+
:color_intensity => 5.64,
|
16
|
+
:hue => 1.04,
|
17
|
+
:malic_acid => 1.71,
|
18
|
+
:total_flavonoids => 3.06,
|
19
|
+
:n_magnesiums => 127,
|
20
|
+
:total_nonflavanoid_phenols => 0.28,
|
21
|
+
:total_proanthocyanins => 2.29,
|
22
|
+
:n_prolines => 1065,
|
23
|
+
:optical_nucleic_acid_concentration => 3.92,
|
24
|
+
:total_phenols => 2.8
|
25
|
+
},
|
26
|
+
{
|
27
|
+
:alcalinity_of_ash => 24.5,
|
28
|
+
:alcohol => 14.13,
|
29
|
+
:ash => 2.74,
|
30
|
+
:label => 3,
|
31
|
+
:color_intensity => 9.2,
|
32
|
+
:hue => 0.61,
|
33
|
+
:malic_acid => 4.1,
|
34
|
+
:total_flavonoids => 0.76,
|
35
|
+
:n_magnesiums => 96,
|
36
|
+
:total_nonflavanoid_phenols => 0.56,
|
37
|
+
:total_proanthocyanins => 1.35,
|
38
|
+
:n_prolines => 560,
|
39
|
+
:optical_nucleic_acid_concentration => 1.6,
|
40
|
+
:total_phenols => 2.05,
|
41
|
+
},
|
42
|
+
],
|
43
|
+
[
|
44
|
+
records.size,
|
45
|
+
records[0].to_h,
|
46
|
+
records[-1].to_h,
|
47
|
+
])
|
48
|
+
end
|
49
|
+
|
50
|
+
sub_test_case('#metadata') do
|
51
|
+
test('#description') do
|
52
|
+
description = @dataset.metadata.description
|
53
|
+
assert do
|
54
|
+
description.start_with?('1. Title of Database: Wine recognition data')
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: red-datasets
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- tomisuker
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2018-
|
12
|
+
date: 2018-11-20 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -99,9 +99,12 @@ files:
|
|
99
99
|
- Rakefile
|
100
100
|
- doc/text/news.md
|
101
101
|
- lib/datasets.rb
|
102
|
+
- lib/datasets/adult.rb
|
102
103
|
- lib/datasets/cifar.rb
|
103
104
|
- lib/datasets/dataset.rb
|
105
|
+
- lib/datasets/dictionary.rb
|
104
106
|
- lib/datasets/downloader.rb
|
107
|
+
- lib/datasets/fashion-mnist.rb
|
105
108
|
- lib/datasets/iris.rb
|
106
109
|
- lib/datasets/metadata.rb
|
107
110
|
- lib/datasets/mnist.rb
|
@@ -109,15 +112,20 @@ files:
|
|
109
112
|
- lib/datasets/table.rb
|
110
113
|
- lib/datasets/version.rb
|
111
114
|
- lib/datasets/wikipedia.rb
|
115
|
+
- lib/datasets/wine.rb
|
112
116
|
- red-datasets.gemspec
|
113
117
|
- test/helper.rb
|
114
118
|
- test/run-test.rb
|
119
|
+
- test/test-adult.rb
|
115
120
|
- test/test-cifar.rb
|
121
|
+
- test/test-dictionary.rb
|
122
|
+
- test/test-fashion-mnist.rb
|
116
123
|
- test/test-iris.rb
|
117
124
|
- test/test-mnist.rb
|
118
125
|
- test/test-penn-treebank.rb
|
119
126
|
- test/test-table.rb
|
120
127
|
- test/test-wikipedia.rb
|
128
|
+
- test/test-wine.rb
|
121
129
|
homepage: https://github.com/red-data-tools/red-datasets
|
122
130
|
licenses:
|
123
131
|
- MIT
|
@@ -138,16 +146,20 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
138
146
|
version: '0'
|
139
147
|
requirements: []
|
140
148
|
rubyforge_project:
|
141
|
-
rubygems_version: 3.0.0.
|
149
|
+
rubygems_version: 3.0.0.beta2
|
142
150
|
signing_key:
|
143
151
|
specification_version: 4
|
144
152
|
summary: Red Datasets provides classes that provide common datasets such as iris dataset.
|
145
153
|
test_files:
|
154
|
+
- test/test-wine.rb
|
146
155
|
- test/run-test.rb
|
147
156
|
- test/test-cifar.rb
|
157
|
+
- test/test-fashion-mnist.rb
|
148
158
|
- test/test-wikipedia.rb
|
149
159
|
- test/test-iris.rb
|
150
160
|
- test/helper.rb
|
151
161
|
- test/test-mnist.rb
|
152
162
|
- test/test-table.rb
|
163
|
+
- test/test-adult.rb
|
153
164
|
- test/test-penn-treebank.rb
|
165
|
+
- test/test-dictionary.rb
|