red-datasets 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6d071f7be3f241f1fb4327e63666c35879488f47c63e9844f8e86d099e385d79
4
- data.tar.gz: 7e688dfc0ccc9d0ca8bc0070eef71dee1f3e7732e8887d443b37a577467dbf75
3
+ metadata.gz: 56c3132aec99f8dab763d68d0b731df0e5b2c97ea9f88a847021210fda94b9ac
4
+ data.tar.gz: 20e11d381718b801dac9c155385837b770c5067646ac235ade9c0f06f4d23cb8
5
5
  SHA512:
6
- metadata.gz: f71bf4fbb25332709d4ef4c8ddc1121781ecac05097551d336091c875a5c885fd20bbba658b41085faf3d1433c29ece43d458c104c300f6eeaa0d8088eae6377
7
- data.tar.gz: cca27dc33ed60f0093bcf940068590df4fb0848b6f40ba265b9bf53316c888267d8c561248fdc5c755cbf5c93ded2a5068d6f8369e5ec6951bee367c552d8677
6
+ metadata.gz: 1c3ccf7b09e6064cacc47c04dd972ef4ba9318278630446ae60911412e9dc5dced5c988423b78a1c0ae68d71ede6136ebd2ee9c86f4247fd56bbf940f7997f7e
7
+ data.tar.gz: bebe49f31022dfeab1eb7626ef1eaf722d5dedf373dab89ba79e3e2afb4849ace50b5098688e4d0480f22719a758a086c288c600cdcb358099a7aa9d1e60eab1
data/README.md CHANGED
@@ -37,6 +37,44 @@ iris.each do |record|
37
37
  end
38
38
  ```
39
39
 
40
+ Here is an example to access CIFAR dataset by `#each`:
41
+
42
+ **CIFAR-10**
43
+
44
+ ```ruby
45
+ require "datasets"
46
+
47
+ cifar = Datasets::CIFAR.new(n_classes: 10, type: :train)
48
+ cifar.metadata
49
+ #=> #<struct Datasets::Metadata name="CIFAR-10", url="https://www.cs.toronto.edu/~kriz/cifar.html", licenses=nil, description="CIFAR-10 is 32x32 image dataset">licenses=nil, description="CIFAR-10 is 32x32 image datasets">
50
+ cifar.each do |record|
51
+ p record.pixels
52
+ # => [59, 43, 50, 68, 98, 119, 139, 145, 149, 143, .....]
53
+ p record.label
54
+ # => 6
55
+ end
56
+ end
57
+ ```
58
+
59
+ **CIFAR-100**
60
+
61
+ ```ruby
62
+ require "datasets"
63
+
64
+ cifar = Datasets::CIFAR.new(n_classes: 100, type: :test)
65
+ cifar.metadata
66
+ #=> #<struct Datasets::Metadata name="CIFAR-100", url="https://www.cs.toronto.edu/~kriz/cifar.html", licenses=nil, description="CIFAR-100 is 32x32 image dataset">
67
+ cifar.each do |record|
68
+ p record.pixels
69
+ #=> [199, 196, 195, 195, 196, 197, 198, 198, 199, .....]
70
+ p record.coarse_label
71
+ #=> 10
72
+ p record.fine_label
73
+ #=> 49
74
+ end
75
+ end
76
+ ```
77
+
40
78
  ## License
41
79
 
42
80
  The MIT license. See `LICENSE.txt` for details.
data/doc/text/news.md CHANGED
@@ -1,5 +1,19 @@
1
1
  # News
2
2
 
3
+ ## 0.0.4 - 2018-05-03
4
+
5
+ ### Improvements
6
+
7
+ * `Datasets::Dataset`: Made enumerable.
8
+
9
+ * `Datasets::CIFAR`: Added the CIFAR dataset.
10
+ [GitHub#7][GitHub#8][GitHub#9][GitHub#10]
11
+ [Patch by Yusaku Hatanaka]
12
+
13
+ ### Thanks
14
+
15
+ * Yusaku Hatanaka
16
+
3
17
  ## 0.0.3 - 2018-03-27
4
18
 
5
19
  ### Improvements
@@ -0,0 +1,126 @@
1
+ require "rubygems/package"
2
+ require "zlib"
3
+
4
+ require_relative "dataset"
5
+
6
+ module Datasets
7
+ class CIFAR < Dataset
8
+ class Record10 < Struct.new(:data, :label)
9
+ def pixels
10
+ data.unpack("C*")
11
+ end
12
+ end
13
+
14
+ class Record100 < Struct.new(:data, :coarse_label, :fine_label)
15
+ def pixels
16
+ data.unpack("C*")
17
+ end
18
+ end
19
+
20
+ def initialize(n_classes: 10, type: :train)
21
+ unless [10, 100].include?(n_classes)
22
+ raise 'Please set n_classes 10 or 100'
23
+ end
24
+ unless [:train, :test].include?(type)
25
+ raise 'Please set type :train or :test'
26
+ end
27
+
28
+ super()
29
+
30
+ @metadata.name = "CIFAR-#{n_classes}"
31
+ @metadata.url = "https://www.cs.toronto.edu/~kriz/cifar.html"
32
+ @metadata.description = "CIFAR-#{n_classes} is 32x32 image dataset"
33
+
34
+ @n_classes = n_classes
35
+ @type = type
36
+ end
37
+
38
+ def each(&block)
39
+ return to_enum(__method__) unless block_given?
40
+
41
+ data_path = cache_dir_path + "cifar-#{@n_classes}.tar.gz"
42
+ unless data_path.exist?
43
+ data_url = "https://www.cs.toronto.edu/~kriz/cifar-#{@n_classes}-binary.tar.gz"
44
+ download(data_path, data_url)
45
+ end
46
+
47
+ parse_data(data_path, &block)
48
+ end
49
+
50
+ private
51
+
52
+ def parse_data(data_path, &block)
53
+ open_tar(data_path) do |tar|
54
+ target_file_names.each do |target_file_name|
55
+ tar.seek(target_file_name) do |entry|
56
+ parse_entry(entry, &block)
57
+ end
58
+ end
59
+ end
60
+ end
61
+
62
+ def target_file_names
63
+ case @n_classes
64
+ when 10
65
+ prefix = 'cifar-10-batches-bin'
66
+ case @type
67
+ when :train
68
+ [
69
+ "#{prefix}/data_batch_1.bin",
70
+ "#{prefix}/data_batch_2.bin",
71
+ "#{prefix}/data_batch_3.bin",
72
+ "#{prefix}/data_batch_4.bin",
73
+ "#{prefix}/data_batch_5.bin",
74
+ ]
75
+ when :test
76
+ [
77
+ "#{prefix}/test_batch.bin"
78
+ ]
79
+ end
80
+ when 100
81
+ prefix = "cifar-100-binary"
82
+ case @type
83
+ when :train
84
+ [
85
+ "#{prefix}/train.bin",
86
+ ]
87
+ when :test
88
+ [
89
+ "#{prefix}/test.bin",
90
+ ]
91
+ end
92
+ end
93
+ end
94
+
95
+ def parse_entry(entry)
96
+ case @n_classes
97
+ when 10
98
+ loop do
99
+ label = entry.read(1)
100
+ break if label.nil?
101
+ label = label.unpack("C")[0]
102
+ data = entry.read(3072)
103
+ yield Record10.new(data, label)
104
+ end
105
+ when 100
106
+ loop do
107
+ coarse_label = entry.read(1)
108
+ break if coarse_label.nil?
109
+ coarse_label = coarse_label.unpack("C")[0]
110
+ fine_label = entry.read(1).unpack("C")[0]
111
+ data = entry.read(3072)
112
+ yield Record100.new(data, coarse_label, fine_label)
113
+ end
114
+ end
115
+ end
116
+
117
+ def open_tar(data_path)
118
+ Zlib::GzipReader.open(data_path) do |f|
119
+ Gem::Package::TarReader.new(f) do |tar|
120
+ yield(tar)
121
+ end
122
+ end
123
+ end
124
+ end
125
+ end
126
+
@@ -6,6 +6,8 @@ require_relative "table"
6
6
 
7
7
  module Datasets
8
8
  class Dataset
9
+ include Enumerable
10
+
9
11
  attr_reader :metadata
10
12
  def initialize
11
13
  @metadata = Metadata.new
@@ -1,3 +1,3 @@
1
1
  module Datasets
2
- VERSION = "0.0.3"
2
+ VERSION = "0.0.4"
3
3
  end
data/lib/datasets.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  require "datasets/version"
2
2
 
3
+ require "datasets/cifar"
3
4
  require "datasets/iris"
4
5
  require "datasets/wikipedia"
@@ -0,0 +1,209 @@
1
+ class CIFARTest < Test::Unit::TestCase
2
+ include Helper::Sandbox
3
+
4
+ def setup_raw_data(data)
5
+ setup_sandbox
6
+
7
+ def @dataset.cache_dir_path
8
+ @cache_dir_path
9
+ end
10
+ def @dataset.cache_dir_path=(path)
11
+ @cache_dir_path = path
12
+ end
13
+ @dataset.cache_dir_path = @tmp_dir
14
+
15
+ def @dataset.data=(data)
16
+ @data = data
17
+ end
18
+ @dataset.data = data
19
+
20
+ def @dataset.download(output_path, url)
21
+ Zlib::GzipWriter.open(output_path) do |gz|
22
+ Gem::Package::TarWriter.new(gz) do |tar|
23
+ @data.each do |path, content|
24
+ if content == :directory
25
+ tar.mkdir(path, 0755)
26
+ else
27
+ tar.add_file_simple(path, 0644, content.bytesize) do |file|
28
+ file.write(content)
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
36
+
37
+ def teardown
38
+ teardown_sandbox
39
+ end
40
+
41
+ sub_test_case("cifar-10") do
42
+ def create_data(label, pixel)
43
+ [label].pack("C") + ([pixel] * 3072).pack("C*")
44
+ end
45
+
46
+ sub_test_case("train") do
47
+ def setup
48
+ @dataset = Datasets::CIFAR.new(n_classes: 10, type: :train)
49
+ directory = "cifar-10-batches-bin"
50
+ setup_raw_data(directory => :directory,
51
+ "#{directory}/data_batch_1.bin" => create_data(1, 10),
52
+ "#{directory}/data_batch_2.bin" => create_data(2, 20),
53
+ "#{directory}/data_batch_3.bin" => create_data(3, 30),
54
+ "#{directory}/data_batch_4.bin" => create_data(4, 40),
55
+ "#{directory}/data_batch_5.bin" => create_data(5, 50),
56
+ "#{directory}/data_batch_6.bin" => create_data(6, 60))
57
+ end
58
+
59
+ test("#each") do
60
+ raw_dataset = @dataset.collect do |record|
61
+ {
62
+ :label => record.label,
63
+ :data => record.data,
64
+ :pixels => record.pixels
65
+ }
66
+ end
67
+ assert_equal([
68
+ {
69
+ :label => 1,
70
+ :data => ([10] * 3072).pack("C*"),
71
+ :pixels => [10] * 3072
72
+ },
73
+ {
74
+ :label => 2,
75
+ :data => ([20] * 3072).pack("C*"),
76
+ :pixels => [20] * 3072
77
+ },
78
+ {
79
+ :label => 3,
80
+ :data => ([30] * 3072).pack("C*"),
81
+ :pixels => [30] * 3072
82
+ },
83
+ {
84
+ :label => 4,
85
+ :data => ([40] * 3072).pack("C*"),
86
+ :pixels => [40] * 3072
87
+ },
88
+ {
89
+ :label => 5,
90
+ :data => ([50] * 3072).pack("C*"),
91
+ :pixels => [50] * 3072
92
+ },
93
+ ],
94
+ raw_dataset)
95
+ end
96
+ end
97
+
98
+ sub_test_case("test") do
99
+ def setup
100
+ @dataset = Datasets::CIFAR.new(n_classes: 10, type: :test)
101
+ directory = "cifar-10-batches-bin"
102
+ data = create_data(1, 100) + create_data(2, 200)
103
+ setup_raw_data(directory => :directory,
104
+ "#{directory}/test_batch.bin" => data)
105
+ end
106
+
107
+ test("#each") do
108
+ raw_dataset = @dataset.collect do |record|
109
+ {
110
+ :label => record.label,
111
+ :data => record.data,
112
+ :pixels => record.pixels,
113
+ }
114
+ end
115
+ assert_equal([
116
+ {
117
+ :label => 1,
118
+ :data => ([100] * 3072).pack("C*"),
119
+ :pixels => [100] * 3072
120
+ },
121
+ {
122
+ :label => 2,
123
+ :data => ([200] * 3072).pack("C*"),
124
+ :pixels => [200] * 3072
125
+ },
126
+ ],
127
+ raw_dataset)
128
+ end
129
+ end
130
+ end
131
+
132
+ sub_test_case("cifar-100") do
133
+ def create_data(coarse_label, fine_label, pixel)
134
+ [coarse_label, fine_label].pack("C*") + ([pixel] * 3072).pack("C*")
135
+ end
136
+
137
+ sub_test_case("train") do
138
+ def setup
139
+ @dataset = Datasets::CIFAR.new(n_classes: 100, type: :train)
140
+ directory = "cifar-100-binary"
141
+ data = create_data(1, 11, 10) + create_data(2, 22, 20)
142
+ setup_raw_data(directory => :directory,
143
+ "#{directory}/train.bin" => data)
144
+ end
145
+
146
+ test("#each") do
147
+ raw_dataset = @dataset.collect do |record|
148
+ {
149
+ :coarse_label => record.coarse_label,
150
+ :fine_label => record.fine_label,
151
+ :data => record.data,
152
+ :pixels => record.pixels,
153
+ }
154
+ end
155
+ assert_equal([
156
+ {
157
+ :coarse_label => 1,
158
+ :fine_label => 11,
159
+ :data => ([10] * 3072).pack("C*"),
160
+ :pixels => [10] * 3072
161
+ },
162
+ {
163
+ :coarse_label => 2,
164
+ :fine_label => 22,
165
+ :data => ([20] * 3072).pack("C*"),
166
+ :pixels => [20] * 3072
167
+ },
168
+ ],
169
+ raw_dataset)
170
+ end
171
+ end
172
+
173
+ sub_test_case("test") do
174
+ def setup
175
+ @dataset = Datasets::CIFAR.new(n_classes: 100, type: :test)
176
+ directory = "cifar-100-binary"
177
+ data = create_data(1, 11, 100) + create_data(6, 66, 200)
178
+ setup_raw_data(directory => :directory,
179
+ "#{directory}/test.bin" => data)
180
+ end
181
+
182
+ test("#each") do
183
+ raw_dataset = @dataset.collect do |record|
184
+ {
185
+ :coarse_label => record.coarse_label,
186
+ :fine_label => record.fine_label,
187
+ :data => record.data,
188
+ :pixels => record.pixels,
189
+ }
190
+ end
191
+ assert_equal([
192
+ {
193
+ :coarse_label => 1,
194
+ :fine_label => 11,
195
+ :data => ([100] * 3072).pack("C*"),
196
+ :pixels => [100] * 3072
197
+ },
198
+ {
199
+ :coarse_label => 6,
200
+ :fine_label => 66,
201
+ :data => ([200] * 3072).pack("C*"),
202
+ :pixels => [200] * 3072
203
+ },
204
+ ],
205
+ raw_dataset)
206
+ end
207
+ end
208
+ end
209
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: red-datasets
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - tomisuker
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2018-03-27 00:00:00.000000000 Z
12
+ date: 2018-05-03 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
@@ -99,6 +99,7 @@ files:
99
99
  - Rakefile
100
100
  - doc/text/news.md
101
101
  - lib/datasets.rb
102
+ - lib/datasets/cifar.rb
102
103
  - lib/datasets/dataset.rb
103
104
  - lib/datasets/downloader.rb
104
105
  - lib/datasets/iris.rb
@@ -109,6 +110,7 @@ files:
109
110
  - red-datasets.gemspec
110
111
  - test/helper.rb
111
112
  - test/run-test.rb
113
+ - test/test-cifar.rb
112
114
  - test/test-iris.rb
113
115
  - test/test-table.rb
114
116
  - test/test-wikipedia.rb
@@ -142,3 +144,4 @@ test_files:
142
144
  - test/helper.rb
143
145
  - test/run-test.rb
144
146
  - test/test-table.rb
147
+ - test/test-cifar.rb