red-datasets 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6d071f7be3f241f1fb4327e63666c35879488f47c63e9844f8e86d099e385d79
4
- data.tar.gz: 7e688dfc0ccc9d0ca8bc0070eef71dee1f3e7732e8887d443b37a577467dbf75
3
+ metadata.gz: 56c3132aec99f8dab763d68d0b731df0e5b2c97ea9f88a847021210fda94b9ac
4
+ data.tar.gz: 20e11d381718b801dac9c155385837b770c5067646ac235ade9c0f06f4d23cb8
5
5
  SHA512:
6
- metadata.gz: f71bf4fbb25332709d4ef4c8ddc1121781ecac05097551d336091c875a5c885fd20bbba658b41085faf3d1433c29ece43d458c104c300f6eeaa0d8088eae6377
7
- data.tar.gz: cca27dc33ed60f0093bcf940068590df4fb0848b6f40ba265b9bf53316c888267d8c561248fdc5c755cbf5c93ded2a5068d6f8369e5ec6951bee367c552d8677
6
+ metadata.gz: 1c3ccf7b09e6064cacc47c04dd972ef4ba9318278630446ae60911412e9dc5dced5c988423b78a1c0ae68d71ede6136ebd2ee9c86f4247fd56bbf940f7997f7e
7
+ data.tar.gz: bebe49f31022dfeab1eb7626ef1eaf722d5dedf373dab89ba79e3e2afb4849ace50b5098688e4d0480f22719a758a086c288c600cdcb358099a7aa9d1e60eab1
data/README.md CHANGED
@@ -37,6 +37,44 @@ iris.each do |record|
37
37
  end
38
38
  ```
39
39
 
40
+ Here is an example to access CIFAR dataset by `#each`:
41
+
42
+ **CIFAR-10**
43
+
44
+ ```ruby
45
+ require "datasets"
46
+
47
+ cifar = Datasets::CIFAR.new(n_classes: 10, type: :train)
48
+ cifar.metadata
49
+ #=> #<struct Datasets::Metadata name="CIFAR-10", url="https://www.cs.toronto.edu/~kriz/cifar.html", licenses=nil, description="CIFAR-10 is 32x32 image dataset">licenses=nil, description="CIFAR-10 is 32x32 image datasets">
50
+ cifar.each do |record|
51
+ p record.pixels
52
+ # => [59, 43, 50, 68, 98, 119, 139, 145, 149, 143, .....]
53
+ p record.label
54
+ # => 6
55
+ end
56
+ end
57
+ ```
58
+
59
+ **CIFAR-100**
60
+
61
+ ```ruby
62
+ require "datasets"
63
+
64
+ cifar = Datasets::CIFAR.new(n_classes: 100, type: :test)
65
+ cifar.metadata
66
+ #=> #<struct Datasets::Metadata name="CIFAR-100", url="https://www.cs.toronto.edu/~kriz/cifar.html", licenses=nil, description="CIFAR-100 is 32x32 image dataset">
67
+ cifar.each do |record|
68
+ p record.pixels
69
+ #=> [199, 196, 195, 195, 196, 197, 198, 198, 199, .....]
70
+ p record.coarse_label
71
+ #=> 10
72
+ p record.fine_label
73
+ #=> 49
74
+ end
75
+ end
76
+ ```
77
+
40
78
  ## License
41
79
 
42
80
  The MIT license. See `LICENSE.txt` for details.
data/doc/text/news.md CHANGED
@@ -1,5 +1,19 @@
1
1
  # News
2
2
 
3
+ ## 0.0.4 - 2018-05-03
4
+
5
+ ### Improvements
6
+
7
+ * `Datasets::Dataset`: Made enumerable.
8
+
9
+ * `Datasets::CIFAR`: Added the CIFAR dataset.
10
+ [GitHub#7][GitHub#8][GitHub#9][GitHub#10]
11
+ [Patch by Yusaku Hatanaka]
12
+
13
+ ### Thanks
14
+
15
+ * Yusaku Hatanaka
16
+
3
17
  ## 0.0.3 - 2018-03-27
4
18
 
5
19
  ### Improvements
@@ -0,0 +1,126 @@
1
+ require "rubygems/package"
2
+ require "zlib"
3
+
4
+ require_relative "dataset"
5
+
6
+ module Datasets
7
+ class CIFAR < Dataset
8
+ class Record10 < Struct.new(:data, :label)
9
+ def pixels
10
+ data.unpack("C*")
11
+ end
12
+ end
13
+
14
+ class Record100 < Struct.new(:data, :coarse_label, :fine_label)
15
+ def pixels
16
+ data.unpack("C*")
17
+ end
18
+ end
19
+
20
+ def initialize(n_classes: 10, type: :train)
21
+ unless [10, 100].include?(n_classes)
22
+ raise 'Please set n_classes 10 or 100'
23
+ end
24
+ unless [:train, :test].include?(type)
25
+ raise 'Please set type :train or :test'
26
+ end
27
+
28
+ super()
29
+
30
+ @metadata.name = "CIFAR-#{n_classes}"
31
+ @metadata.url = "https://www.cs.toronto.edu/~kriz/cifar.html"
32
+ @metadata.description = "CIFAR-#{n_classes} is 32x32 image dataset"
33
+
34
+ @n_classes = n_classes
35
+ @type = type
36
+ end
37
+
38
+ def each(&block)
39
+ return to_enum(__method__) unless block_given?
40
+
41
+ data_path = cache_dir_path + "cifar-#{@n_classes}.tar.gz"
42
+ unless data_path.exist?
43
+ data_url = "https://www.cs.toronto.edu/~kriz/cifar-#{@n_classes}-binary.tar.gz"
44
+ download(data_path, data_url)
45
+ end
46
+
47
+ parse_data(data_path, &block)
48
+ end
49
+
50
+ private
51
+
52
+ def parse_data(data_path, &block)
53
+ open_tar(data_path) do |tar|
54
+ target_file_names.each do |target_file_name|
55
+ tar.seek(target_file_name) do |entry|
56
+ parse_entry(entry, &block)
57
+ end
58
+ end
59
+ end
60
+ end
61
+
62
+ def target_file_names
63
+ case @n_classes
64
+ when 10
65
+ prefix = 'cifar-10-batches-bin'
66
+ case @type
67
+ when :train
68
+ [
69
+ "#{prefix}/data_batch_1.bin",
70
+ "#{prefix}/data_batch_2.bin",
71
+ "#{prefix}/data_batch_3.bin",
72
+ "#{prefix}/data_batch_4.bin",
73
+ "#{prefix}/data_batch_5.bin",
74
+ ]
75
+ when :test
76
+ [
77
+ "#{prefix}/test_batch.bin"
78
+ ]
79
+ end
80
+ when 100
81
+ prefix = "cifar-100-binary"
82
+ case @type
83
+ when :train
84
+ [
85
+ "#{prefix}/train.bin",
86
+ ]
87
+ when :test
88
+ [
89
+ "#{prefix}/test.bin",
90
+ ]
91
+ end
92
+ end
93
+ end
94
+
95
+ def parse_entry(entry)
96
+ case @n_classes
97
+ when 10
98
+ loop do
99
+ label = entry.read(1)
100
+ break if label.nil?
101
+ label = label.unpack("C")[0]
102
+ data = entry.read(3072)
103
+ yield Record10.new(data, label)
104
+ end
105
+ when 100
106
+ loop do
107
+ coarse_label = entry.read(1)
108
+ break if coarse_label.nil?
109
+ coarse_label = coarse_label.unpack("C")[0]
110
+ fine_label = entry.read(1).unpack("C")[0]
111
+ data = entry.read(3072)
112
+ yield Record100.new(data, coarse_label, fine_label)
113
+ end
114
+ end
115
+ end
116
+
117
+ def open_tar(data_path)
118
+ Zlib::GzipReader.open(data_path) do |f|
119
+ Gem::Package::TarReader.new(f) do |tar|
120
+ yield(tar)
121
+ end
122
+ end
123
+ end
124
+ end
125
+ end
126
+
@@ -6,6 +6,8 @@ require_relative "table"
6
6
 
7
7
  module Datasets
8
8
  class Dataset
9
+ include Enumerable
10
+
9
11
  attr_reader :metadata
10
12
  def initialize
11
13
  @metadata = Metadata.new
@@ -1,3 +1,3 @@
1
1
  module Datasets
2
- VERSION = "0.0.3"
2
+ VERSION = "0.0.4"
3
3
  end
data/lib/datasets.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  require "datasets/version"
2
2
 
3
+ require "datasets/cifar"
3
4
  require "datasets/iris"
4
5
  require "datasets/wikipedia"
@@ -0,0 +1,209 @@
1
+ class CIFARTest < Test::Unit::TestCase
2
+ include Helper::Sandbox
3
+
4
+ def setup_raw_data(data)
5
+ setup_sandbox
6
+
7
+ def @dataset.cache_dir_path
8
+ @cache_dir_path
9
+ end
10
+ def @dataset.cache_dir_path=(path)
11
+ @cache_dir_path = path
12
+ end
13
+ @dataset.cache_dir_path = @tmp_dir
14
+
15
+ def @dataset.data=(data)
16
+ @data = data
17
+ end
18
+ @dataset.data = data
19
+
20
+ def @dataset.download(output_path, url)
21
+ Zlib::GzipWriter.open(output_path) do |gz|
22
+ Gem::Package::TarWriter.new(gz) do |tar|
23
+ @data.each do |path, content|
24
+ if content == :directory
25
+ tar.mkdir(path, 0755)
26
+ else
27
+ tar.add_file_simple(path, 0644, content.bytesize) do |file|
28
+ file.write(content)
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
35
+ end
36
+
37
+ def teardown
38
+ teardown_sandbox
39
+ end
40
+
41
+ sub_test_case("cifar-10") do
42
+ def create_data(label, pixel)
43
+ [label].pack("C") + ([pixel] * 3072).pack("C*")
44
+ end
45
+
46
+ sub_test_case("train") do
47
+ def setup
48
+ @dataset = Datasets::CIFAR.new(n_classes: 10, type: :train)
49
+ directory = "cifar-10-batches-bin"
50
+ setup_raw_data(directory => :directory,
51
+ "#{directory}/data_batch_1.bin" => create_data(1, 10),
52
+ "#{directory}/data_batch_2.bin" => create_data(2, 20),
53
+ "#{directory}/data_batch_3.bin" => create_data(3, 30),
54
+ "#{directory}/data_batch_4.bin" => create_data(4, 40),
55
+ "#{directory}/data_batch_5.bin" => create_data(5, 50),
56
+ "#{directory}/data_batch_6.bin" => create_data(6, 60))
57
+ end
58
+
59
+ test("#each") do
60
+ raw_dataset = @dataset.collect do |record|
61
+ {
62
+ :label => record.label,
63
+ :data => record.data,
64
+ :pixels => record.pixels
65
+ }
66
+ end
67
+ assert_equal([
68
+ {
69
+ :label => 1,
70
+ :data => ([10] * 3072).pack("C*"),
71
+ :pixels => [10] * 3072
72
+ },
73
+ {
74
+ :label => 2,
75
+ :data => ([20] * 3072).pack("C*"),
76
+ :pixels => [20] * 3072
77
+ },
78
+ {
79
+ :label => 3,
80
+ :data => ([30] * 3072).pack("C*"),
81
+ :pixels => [30] * 3072
82
+ },
83
+ {
84
+ :label => 4,
85
+ :data => ([40] * 3072).pack("C*"),
86
+ :pixels => [40] * 3072
87
+ },
88
+ {
89
+ :label => 5,
90
+ :data => ([50] * 3072).pack("C*"),
91
+ :pixels => [50] * 3072
92
+ },
93
+ ],
94
+ raw_dataset)
95
+ end
96
+ end
97
+
98
+ sub_test_case("test") do
99
+ def setup
100
+ @dataset = Datasets::CIFAR.new(n_classes: 10, type: :test)
101
+ directory = "cifar-10-batches-bin"
102
+ data = create_data(1, 100) + create_data(2, 200)
103
+ setup_raw_data(directory => :directory,
104
+ "#{directory}/test_batch.bin" => data)
105
+ end
106
+
107
+ test("#each") do
108
+ raw_dataset = @dataset.collect do |record|
109
+ {
110
+ :label => record.label,
111
+ :data => record.data,
112
+ :pixels => record.pixels,
113
+ }
114
+ end
115
+ assert_equal([
116
+ {
117
+ :label => 1,
118
+ :data => ([100] * 3072).pack("C*"),
119
+ :pixels => [100] * 3072
120
+ },
121
+ {
122
+ :label => 2,
123
+ :data => ([200] * 3072).pack("C*"),
124
+ :pixels => [200] * 3072
125
+ },
126
+ ],
127
+ raw_dataset)
128
+ end
129
+ end
130
+ end
131
+
132
+ sub_test_case("cifar-100") do
133
+ def create_data(coarse_label, fine_label, pixel)
134
+ [coarse_label, fine_label].pack("C*") + ([pixel] * 3072).pack("C*")
135
+ end
136
+
137
+ sub_test_case("train") do
138
+ def setup
139
+ @dataset = Datasets::CIFAR.new(n_classes: 100, type: :train)
140
+ directory = "cifar-100-binary"
141
+ data = create_data(1, 11, 10) + create_data(2, 22, 20)
142
+ setup_raw_data(directory => :directory,
143
+ "#{directory}/train.bin" => data)
144
+ end
145
+
146
+ test("#each") do
147
+ raw_dataset = @dataset.collect do |record|
148
+ {
149
+ :coarse_label => record.coarse_label,
150
+ :fine_label => record.fine_label,
151
+ :data => record.data,
152
+ :pixels => record.pixels,
153
+ }
154
+ end
155
+ assert_equal([
156
+ {
157
+ :coarse_label => 1,
158
+ :fine_label => 11,
159
+ :data => ([10] * 3072).pack("C*"),
160
+ :pixels => [10] * 3072
161
+ },
162
+ {
163
+ :coarse_label => 2,
164
+ :fine_label => 22,
165
+ :data => ([20] * 3072).pack("C*"),
166
+ :pixels => [20] * 3072
167
+ },
168
+ ],
169
+ raw_dataset)
170
+ end
171
+ end
172
+
173
+ sub_test_case("test") do
174
+ def setup
175
+ @dataset = Datasets::CIFAR.new(n_classes: 100, type: :test)
176
+ directory = "cifar-100-binary"
177
+ data = create_data(1, 11, 100) + create_data(6, 66, 200)
178
+ setup_raw_data(directory => :directory,
179
+ "#{directory}/test.bin" => data)
180
+ end
181
+
182
+ test("#each") do
183
+ raw_dataset = @dataset.collect do |record|
184
+ {
185
+ :coarse_label => record.coarse_label,
186
+ :fine_label => record.fine_label,
187
+ :data => record.data,
188
+ :pixels => record.pixels,
189
+ }
190
+ end
191
+ assert_equal([
192
+ {
193
+ :coarse_label => 1,
194
+ :fine_label => 11,
195
+ :data => ([100] * 3072).pack("C*"),
196
+ :pixels => [100] * 3072
197
+ },
198
+ {
199
+ :coarse_label => 6,
200
+ :fine_label => 66,
201
+ :data => ([200] * 3072).pack("C*"),
202
+ :pixels => [200] * 3072
203
+ },
204
+ ],
205
+ raw_dataset)
206
+ end
207
+ end
208
+ end
209
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: red-datasets
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - tomisuker
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2018-03-27 00:00:00.000000000 Z
12
+ date: 2018-05-03 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
@@ -99,6 +99,7 @@ files:
99
99
  - Rakefile
100
100
  - doc/text/news.md
101
101
  - lib/datasets.rb
102
+ - lib/datasets/cifar.rb
102
103
  - lib/datasets/dataset.rb
103
104
  - lib/datasets/downloader.rb
104
105
  - lib/datasets/iris.rb
@@ -109,6 +110,7 @@ files:
109
110
  - red-datasets.gemspec
110
111
  - test/helper.rb
111
112
  - test/run-test.rb
113
+ - test/test-cifar.rb
112
114
  - test/test-iris.rb
113
115
  - test/test-table.rb
114
116
  - test/test-wikipedia.rb
@@ -142,3 +144,4 @@ test_files:
142
144
  - test/helper.rb
143
145
  - test/run-test.rb
144
146
  - test/test-table.rb
147
+ - test/test-cifar.rb