red-datasets 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +38 -0
- data/doc/text/news.md +14 -0
- data/lib/datasets/cifar.rb +126 -0
- data/lib/datasets/dataset.rb +2 -0
- data/lib/datasets/version.rb +1 -1
- data/lib/datasets.rb +1 -0
- data/test/test-cifar.rb +209 -0
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 56c3132aec99f8dab763d68d0b731df0e5b2c97ea9f88a847021210fda94b9ac
|
4
|
+
data.tar.gz: 20e11d381718b801dac9c155385837b770c5067646ac235ade9c0f06f4d23cb8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1c3ccf7b09e6064cacc47c04dd972ef4ba9318278630446ae60911412e9dc5dced5c988423b78a1c0ae68d71ede6136ebd2ee9c86f4247fd56bbf940f7997f7e
|
7
|
+
data.tar.gz: bebe49f31022dfeab1eb7626ef1eaf722d5dedf373dab89ba79e3e2afb4849ace50b5098688e4d0480f22719a758a086c288c600cdcb358099a7aa9d1e60eab1
|
data/README.md
CHANGED
@@ -37,6 +37,44 @@ iris.each do |record|
|
|
37
37
|
end
|
38
38
|
```
|
39
39
|
|
40
|
+
Here is an example to access CIFAR dataset by `#each`:
|
41
|
+
|
42
|
+
**CIFAR-10**
|
43
|
+
|
44
|
+
```ruby
|
45
|
+
require "datasets"
|
46
|
+
|
47
|
+
cifar = Datasets::CIFAR.new(n_classes: 10, type: :train)
|
48
|
+
cifar.metadata
|
49
|
+
#=> #<struct Datasets::Metadata name="CIFAR-10", url="https://www.cs.toronto.edu/~kriz/cifar.html", licenses=nil, description="CIFAR-10 is 32x32 image dataset">licenses=nil, description="CIFAR-10 is 32x32 image datasets">
|
50
|
+
cifar.each do |record|
|
51
|
+
p record.pixels
|
52
|
+
# => [59, 43, 50, 68, 98, 119, 139, 145, 149, 143, .....]
|
53
|
+
p record.label
|
54
|
+
# => 6
|
55
|
+
end
|
56
|
+
end
|
57
|
+
```
|
58
|
+
|
59
|
+
**CIFAR-100**
|
60
|
+
|
61
|
+
```ruby
|
62
|
+
require "datasets"
|
63
|
+
|
64
|
+
cifar = Datasets::CIFAR.new(n_classes: 100, type: :test)
|
65
|
+
cifar.metadata
|
66
|
+
#=> #<struct Datasets::Metadata name="CIFAR-100", url="https://www.cs.toronto.edu/~kriz/cifar.html", licenses=nil, description="CIFAR-100 is 32x32 image dataset">
|
67
|
+
cifar.each do |record|
|
68
|
+
p record.pixels
|
69
|
+
#=> [199, 196, 195, 195, 196, 197, 198, 198, 199, .....]
|
70
|
+
p record.coarse_label
|
71
|
+
#=> 10
|
72
|
+
p record.fine_label
|
73
|
+
#=> 49
|
74
|
+
end
|
75
|
+
end
|
76
|
+
```
|
77
|
+
|
40
78
|
## License
|
41
79
|
|
42
80
|
The MIT license. See `LICENSE.txt` for details.
|
data/doc/text/news.md
CHANGED
@@ -1,5 +1,19 @@
|
|
1
1
|
# News
|
2
2
|
|
3
|
+
## 0.0.4 - 2018-05-03
|
4
|
+
|
5
|
+
### Improvements
|
6
|
+
|
7
|
+
* `Datasets::Dataset`: Made enumerable.
|
8
|
+
|
9
|
+
* `Datasets::CIFAR`: Added the CIFAR dataset.
|
10
|
+
[GitHub#7][GitHub#8][GitHub#9][GitHub#10]
|
11
|
+
[Patch by Yusaku Hatanaka]
|
12
|
+
|
13
|
+
### Thanks
|
14
|
+
|
15
|
+
* Yusaku Hatanaka
|
16
|
+
|
3
17
|
## 0.0.3 - 2018-03-27
|
4
18
|
|
5
19
|
### Improvements
|
@@ -0,0 +1,126 @@
|
|
1
|
+
require "rubygems/package"
|
2
|
+
require "zlib"
|
3
|
+
|
4
|
+
require_relative "dataset"
|
5
|
+
|
6
|
+
module Datasets
|
7
|
+
class CIFAR < Dataset
|
8
|
+
class Record10 < Struct.new(:data, :label)
|
9
|
+
def pixels
|
10
|
+
data.unpack("C*")
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
class Record100 < Struct.new(:data, :coarse_label, :fine_label)
|
15
|
+
def pixels
|
16
|
+
data.unpack("C*")
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def initialize(n_classes: 10, type: :train)
|
21
|
+
unless [10, 100].include?(n_classes)
|
22
|
+
raise 'Please set n_classes 10 or 100'
|
23
|
+
end
|
24
|
+
unless [:train, :test].include?(type)
|
25
|
+
raise 'Please set type :train or :test'
|
26
|
+
end
|
27
|
+
|
28
|
+
super()
|
29
|
+
|
30
|
+
@metadata.name = "CIFAR-#{n_classes}"
|
31
|
+
@metadata.url = "https://www.cs.toronto.edu/~kriz/cifar.html"
|
32
|
+
@metadata.description = "CIFAR-#{n_classes} is 32x32 image dataset"
|
33
|
+
|
34
|
+
@n_classes = n_classes
|
35
|
+
@type = type
|
36
|
+
end
|
37
|
+
|
38
|
+
def each(&block)
|
39
|
+
return to_enum(__method__) unless block_given?
|
40
|
+
|
41
|
+
data_path = cache_dir_path + "cifar-#{@n_classes}.tar.gz"
|
42
|
+
unless data_path.exist?
|
43
|
+
data_url = "https://www.cs.toronto.edu/~kriz/cifar-#{@n_classes}-binary.tar.gz"
|
44
|
+
download(data_path, data_url)
|
45
|
+
end
|
46
|
+
|
47
|
+
parse_data(data_path, &block)
|
48
|
+
end
|
49
|
+
|
50
|
+
private
|
51
|
+
|
52
|
+
def parse_data(data_path, &block)
|
53
|
+
open_tar(data_path) do |tar|
|
54
|
+
target_file_names.each do |target_file_name|
|
55
|
+
tar.seek(target_file_name) do |entry|
|
56
|
+
parse_entry(entry, &block)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def target_file_names
|
63
|
+
case @n_classes
|
64
|
+
when 10
|
65
|
+
prefix = 'cifar-10-batches-bin'
|
66
|
+
case @type
|
67
|
+
when :train
|
68
|
+
[
|
69
|
+
"#{prefix}/data_batch_1.bin",
|
70
|
+
"#{prefix}/data_batch_2.bin",
|
71
|
+
"#{prefix}/data_batch_3.bin",
|
72
|
+
"#{prefix}/data_batch_4.bin",
|
73
|
+
"#{prefix}/data_batch_5.bin",
|
74
|
+
]
|
75
|
+
when :test
|
76
|
+
[
|
77
|
+
"#{prefix}/test_batch.bin"
|
78
|
+
]
|
79
|
+
end
|
80
|
+
when 100
|
81
|
+
prefix = "cifar-100-binary"
|
82
|
+
case @type
|
83
|
+
when :train
|
84
|
+
[
|
85
|
+
"#{prefix}/train.bin",
|
86
|
+
]
|
87
|
+
when :test
|
88
|
+
[
|
89
|
+
"#{prefix}/test.bin",
|
90
|
+
]
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
def parse_entry(entry)
|
96
|
+
case @n_classes
|
97
|
+
when 10
|
98
|
+
loop do
|
99
|
+
label = entry.read(1)
|
100
|
+
break if label.nil?
|
101
|
+
label = label.unpack("C")[0]
|
102
|
+
data = entry.read(3072)
|
103
|
+
yield Record10.new(data, label)
|
104
|
+
end
|
105
|
+
when 100
|
106
|
+
loop do
|
107
|
+
coarse_label = entry.read(1)
|
108
|
+
break if coarse_label.nil?
|
109
|
+
coarse_label = coarse_label.unpack("C")[0]
|
110
|
+
fine_label = entry.read(1).unpack("C")[0]
|
111
|
+
data = entry.read(3072)
|
112
|
+
yield Record100.new(data, coarse_label, fine_label)
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
def open_tar(data_path)
|
118
|
+
Zlib::GzipReader.open(data_path) do |f|
|
119
|
+
Gem::Package::TarReader.new(f) do |tar|
|
120
|
+
yield(tar)
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
data/lib/datasets/dataset.rb
CHANGED
data/lib/datasets/version.rb
CHANGED
data/lib/datasets.rb
CHANGED
data/test/test-cifar.rb
ADDED
@@ -0,0 +1,209 @@
|
|
1
|
+
class CIFARTest < Test::Unit::TestCase
|
2
|
+
include Helper::Sandbox
|
3
|
+
|
4
|
+
def setup_raw_data(data)
|
5
|
+
setup_sandbox
|
6
|
+
|
7
|
+
def @dataset.cache_dir_path
|
8
|
+
@cache_dir_path
|
9
|
+
end
|
10
|
+
def @dataset.cache_dir_path=(path)
|
11
|
+
@cache_dir_path = path
|
12
|
+
end
|
13
|
+
@dataset.cache_dir_path = @tmp_dir
|
14
|
+
|
15
|
+
def @dataset.data=(data)
|
16
|
+
@data = data
|
17
|
+
end
|
18
|
+
@dataset.data = data
|
19
|
+
|
20
|
+
def @dataset.download(output_path, url)
|
21
|
+
Zlib::GzipWriter.open(output_path) do |gz|
|
22
|
+
Gem::Package::TarWriter.new(gz) do |tar|
|
23
|
+
@data.each do |path, content|
|
24
|
+
if content == :directory
|
25
|
+
tar.mkdir(path, 0755)
|
26
|
+
else
|
27
|
+
tar.add_file_simple(path, 0644, content.bytesize) do |file|
|
28
|
+
file.write(content)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def teardown
|
38
|
+
teardown_sandbox
|
39
|
+
end
|
40
|
+
|
41
|
+
sub_test_case("cifar-10") do
|
42
|
+
def create_data(label, pixel)
|
43
|
+
[label].pack("C") + ([pixel] * 3072).pack("C*")
|
44
|
+
end
|
45
|
+
|
46
|
+
sub_test_case("train") do
|
47
|
+
def setup
|
48
|
+
@dataset = Datasets::CIFAR.new(n_classes: 10, type: :train)
|
49
|
+
directory = "cifar-10-batches-bin"
|
50
|
+
setup_raw_data(directory => :directory,
|
51
|
+
"#{directory}/data_batch_1.bin" => create_data(1, 10),
|
52
|
+
"#{directory}/data_batch_2.bin" => create_data(2, 20),
|
53
|
+
"#{directory}/data_batch_3.bin" => create_data(3, 30),
|
54
|
+
"#{directory}/data_batch_4.bin" => create_data(4, 40),
|
55
|
+
"#{directory}/data_batch_5.bin" => create_data(5, 50),
|
56
|
+
"#{directory}/data_batch_6.bin" => create_data(6, 60))
|
57
|
+
end
|
58
|
+
|
59
|
+
test("#each") do
|
60
|
+
raw_dataset = @dataset.collect do |record|
|
61
|
+
{
|
62
|
+
:label => record.label,
|
63
|
+
:data => record.data,
|
64
|
+
:pixels => record.pixels
|
65
|
+
}
|
66
|
+
end
|
67
|
+
assert_equal([
|
68
|
+
{
|
69
|
+
:label => 1,
|
70
|
+
:data => ([10] * 3072).pack("C*"),
|
71
|
+
:pixels => [10] * 3072
|
72
|
+
},
|
73
|
+
{
|
74
|
+
:label => 2,
|
75
|
+
:data => ([20] * 3072).pack("C*"),
|
76
|
+
:pixels => [20] * 3072
|
77
|
+
},
|
78
|
+
{
|
79
|
+
:label => 3,
|
80
|
+
:data => ([30] * 3072).pack("C*"),
|
81
|
+
:pixels => [30] * 3072
|
82
|
+
},
|
83
|
+
{
|
84
|
+
:label => 4,
|
85
|
+
:data => ([40] * 3072).pack("C*"),
|
86
|
+
:pixels => [40] * 3072
|
87
|
+
},
|
88
|
+
{
|
89
|
+
:label => 5,
|
90
|
+
:data => ([50] * 3072).pack("C*"),
|
91
|
+
:pixels => [50] * 3072
|
92
|
+
},
|
93
|
+
],
|
94
|
+
raw_dataset)
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
sub_test_case("test") do
|
99
|
+
def setup
|
100
|
+
@dataset = Datasets::CIFAR.new(n_classes: 10, type: :test)
|
101
|
+
directory = "cifar-10-batches-bin"
|
102
|
+
data = create_data(1, 100) + create_data(2, 200)
|
103
|
+
setup_raw_data(directory => :directory,
|
104
|
+
"#{directory}/test_batch.bin" => data)
|
105
|
+
end
|
106
|
+
|
107
|
+
test("#each") do
|
108
|
+
raw_dataset = @dataset.collect do |record|
|
109
|
+
{
|
110
|
+
:label => record.label,
|
111
|
+
:data => record.data,
|
112
|
+
:pixels => record.pixels,
|
113
|
+
}
|
114
|
+
end
|
115
|
+
assert_equal([
|
116
|
+
{
|
117
|
+
:label => 1,
|
118
|
+
:data => ([100] * 3072).pack("C*"),
|
119
|
+
:pixels => [100] * 3072
|
120
|
+
},
|
121
|
+
{
|
122
|
+
:label => 2,
|
123
|
+
:data => ([200] * 3072).pack("C*"),
|
124
|
+
:pixels => [200] * 3072
|
125
|
+
},
|
126
|
+
],
|
127
|
+
raw_dataset)
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
sub_test_case("cifar-100") do
|
133
|
+
def create_data(coarse_label, fine_label, pixel)
|
134
|
+
[coarse_label, fine_label].pack("C*") + ([pixel] * 3072).pack("C*")
|
135
|
+
end
|
136
|
+
|
137
|
+
sub_test_case("train") do
|
138
|
+
def setup
|
139
|
+
@dataset = Datasets::CIFAR.new(n_classes: 100, type: :train)
|
140
|
+
directory = "cifar-100-binary"
|
141
|
+
data = create_data(1, 11, 10) + create_data(2, 22, 20)
|
142
|
+
setup_raw_data(directory => :directory,
|
143
|
+
"#{directory}/train.bin" => data)
|
144
|
+
end
|
145
|
+
|
146
|
+
test("#each") do
|
147
|
+
raw_dataset = @dataset.collect do |record|
|
148
|
+
{
|
149
|
+
:coarse_label => record.coarse_label,
|
150
|
+
:fine_label => record.fine_label,
|
151
|
+
:data => record.data,
|
152
|
+
:pixels => record.pixels,
|
153
|
+
}
|
154
|
+
end
|
155
|
+
assert_equal([
|
156
|
+
{
|
157
|
+
:coarse_label => 1,
|
158
|
+
:fine_label => 11,
|
159
|
+
:data => ([10] * 3072).pack("C*"),
|
160
|
+
:pixels => [10] * 3072
|
161
|
+
},
|
162
|
+
{
|
163
|
+
:coarse_label => 2,
|
164
|
+
:fine_label => 22,
|
165
|
+
:data => ([20] * 3072).pack("C*"),
|
166
|
+
:pixels => [20] * 3072
|
167
|
+
},
|
168
|
+
],
|
169
|
+
raw_dataset)
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
sub_test_case("test") do
|
174
|
+
def setup
|
175
|
+
@dataset = Datasets::CIFAR.new(n_classes: 100, type: :test)
|
176
|
+
directory = "cifar-100-binary"
|
177
|
+
data = create_data(1, 11, 100) + create_data(6, 66, 200)
|
178
|
+
setup_raw_data(directory => :directory,
|
179
|
+
"#{directory}/test.bin" => data)
|
180
|
+
end
|
181
|
+
|
182
|
+
test("#each") do
|
183
|
+
raw_dataset = @dataset.collect do |record|
|
184
|
+
{
|
185
|
+
:coarse_label => record.coarse_label,
|
186
|
+
:fine_label => record.fine_label,
|
187
|
+
:data => record.data,
|
188
|
+
:pixels => record.pixels,
|
189
|
+
}
|
190
|
+
end
|
191
|
+
assert_equal([
|
192
|
+
{
|
193
|
+
:coarse_label => 1,
|
194
|
+
:fine_label => 11,
|
195
|
+
:data => ([100] * 3072).pack("C*"),
|
196
|
+
:pixels => [100] * 3072
|
197
|
+
},
|
198
|
+
{
|
199
|
+
:coarse_label => 6,
|
200
|
+
:fine_label => 66,
|
201
|
+
:data => ([200] * 3072).pack("C*"),
|
202
|
+
:pixels => [200] * 3072
|
203
|
+
},
|
204
|
+
],
|
205
|
+
raw_dataset)
|
206
|
+
end
|
207
|
+
end
|
208
|
+
end
|
209
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: red-datasets
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- tomisuker
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2018-03
|
12
|
+
date: 2018-05-03 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -99,6 +99,7 @@ files:
|
|
99
99
|
- Rakefile
|
100
100
|
- doc/text/news.md
|
101
101
|
- lib/datasets.rb
|
102
|
+
- lib/datasets/cifar.rb
|
102
103
|
- lib/datasets/dataset.rb
|
103
104
|
- lib/datasets/downloader.rb
|
104
105
|
- lib/datasets/iris.rb
|
@@ -109,6 +110,7 @@ files:
|
|
109
110
|
- red-datasets.gemspec
|
110
111
|
- test/helper.rb
|
111
112
|
- test/run-test.rb
|
113
|
+
- test/test-cifar.rb
|
112
114
|
- test/test-iris.rb
|
113
115
|
- test/test-table.rb
|
114
116
|
- test/test-wikipedia.rb
|
@@ -142,3 +144,4 @@ test_files:
|
|
142
144
|
- test/helper.rb
|
143
145
|
- test/run-test.rb
|
144
146
|
- test/test-table.rb
|
147
|
+
- test/test-cifar.rb
|