mnist-learn 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.gitignore +12 -12
- data/CODE_OF_CONDUCT.md +13 -13
- data/Gemfile +4 -4
- data/LICENSE.txt +21 -21
- data/README.md +52 -52
- data/Rakefile +1 -1
- data/exe/mnist2csv +45 -45
- data/lib/mnist-learn.rb +205 -205
- data/lib/mnist-learn/version.rb +3 -3
- data/mnist-learn.gemspec +27 -27
- data/script/console +14 -14
- data/script/setup +7 -7
- metadata +7 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 46aa06de33312b48f78e42e0b79c5dd19bf636412c2fa6645d6c5b2182966017
|
4
|
+
data.tar.gz: 3843bbc99283de705db7a40ba1c3256cac87bfab0abfb537a4fb327a16a84579
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 602847ccd6f1621ce40085c387f46309f299bc6508a7c2dfe747f88f35f0fea782af187d1d491823e5bc834bebb75eea9f2f741609567fec49d08d98cb5899dc
|
7
|
+
data.tar.gz: b160252a352fb515bc4def2ade4114f4508b904c1e68c22e865c9fcd033ee33b8d8a25320f0953deb0dc1d302029ba87fee5f33b6513dd9c37b5830dcaab2561
|
data/.gitignore
CHANGED
@@ -1,12 +1,12 @@
|
|
1
|
-
/.bundle/
|
2
|
-
/.yardoc
|
3
|
-
/Gemfile.lock
|
4
|
-
/_yardoc/
|
5
|
-
/coverage/
|
6
|
-
/doc/
|
7
|
-
/pkg/
|
8
|
-
/spec/reports/
|
9
|
-
/spec/fixtures/data
|
10
|
-
/tmp/
|
11
|
-
/data
|
12
|
-
.rspec_status
|
1
|
+
/.bundle/
|
2
|
+
/.yardoc
|
3
|
+
/Gemfile.lock
|
4
|
+
/_yardoc/
|
5
|
+
/coverage/
|
6
|
+
/doc/
|
7
|
+
/pkg/
|
8
|
+
/spec/reports/
|
9
|
+
/spec/fixtures/data
|
10
|
+
/tmp/
|
11
|
+
/data
|
12
|
+
.rspec_status
|
data/CODE_OF_CONDUCT.md
CHANGED
@@ -1,13 +1,13 @@
|
|
1
|
-
# Contributor Code of Conduct
|
2
|
-
|
3
|
-
As contributors and maintainers of this project, we pledge to respect all people who contribute through reporting issues, posting feature requests, updating documentation, submitting pull requests or patches, and other activities.
|
4
|
-
|
5
|
-
We are committed to making participation in this project a harassment-free experience for everyone, regardless of level of experience, gender, gender identity and expression, sexual orientation, disability, personal appearance, body size, race, age, or religion.
|
6
|
-
|
7
|
-
Examples of unacceptable behavior by participants include the use of sexual language or imagery, derogatory comments or personal attacks, trolling, public or private harassment, insults, or other unprofessional conduct.
|
8
|
-
|
9
|
-
Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct. Project maintainers who do not follow the Code of Conduct may be removed from the project team.
|
10
|
-
|
11
|
-
Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by opening an issue or contacting one or more of the project maintainers.
|
12
|
-
|
13
|
-
This Code of Conduct is adapted from the [Contributor Covenant](http:contributor-covenant.org), version 1.0.0, available at [http://contributor-covenant.org/version/1/0/0/](http://contributor-covenant.org/version/1/0/0/)
|
1
|
+
# Contributor Code of Conduct
|
2
|
+
|
3
|
+
As contributors and maintainers of this project, we pledge to respect all people who contribute through reporting issues, posting feature requests, updating documentation, submitting pull requests or patches, and other activities.
|
4
|
+
|
5
|
+
We are committed to making participation in this project a harassment-free experience for everyone, regardless of level of experience, gender, gender identity and expression, sexual orientation, disability, personal appearance, body size, race, age, or religion.
|
6
|
+
|
7
|
+
Examples of unacceptable behavior by participants include the use of sexual language or imagery, derogatory comments or personal attacks, trolling, public or private harassment, insults, or other unprofessional conduct.
|
8
|
+
|
9
|
+
Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct. Project maintainers who do not follow the Code of Conduct may be removed from the project team.
|
10
|
+
|
11
|
+
Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by opening an issue or contacting one or more of the project maintainers.
|
12
|
+
|
13
|
+
This Code of Conduct is adapted from the [Contributor Covenant](http:contributor-covenant.org), version 1.0.0, available at [http://contributor-covenant.org/version/1/0/0/](http://contributor-covenant.org/version/1/0/0/)
|
data/Gemfile
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
source 'https://rubygems.org'
|
2
|
-
|
3
|
-
# Specify your gem's dependencies in mnist.gemspec
|
4
|
-
gemspec
|
1
|
+
source 'https://rubygems.org'
|
2
|
+
|
3
|
+
# Specify your gem's dependencies in mnist.gemspec
|
4
|
+
gemspec
|
data/LICENSE.txt
CHANGED
@@ -1,21 +1,21 @@
|
|
1
|
-
The MIT License (MIT)
|
2
|
-
|
3
|
-
Copyright (c) 2015 Kenta Murata
|
4
|
-
|
5
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
-
of this software and associated documentation files (the "Software"), to deal
|
7
|
-
in the Software without restriction, including without limitation the rights
|
8
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
-
copies of the Software, and to permit persons to whom the Software is
|
10
|
-
furnished to do so, subject to the following conditions:
|
11
|
-
|
12
|
-
The above copyright notice and this permission notice shall be included in
|
13
|
-
all copies or substantial portions of the Software.
|
14
|
-
|
15
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
-
THE SOFTWARE.
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2015 Kenta Murata
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
CHANGED
@@ -1,52 +1,52 @@
|
|
1
|
-
# Mnist
|
2
|
-
|
3
|
-
Utility ruby gem for easily loading and parsing the MNIST Database of handwritten digits for machine learning purposes.
|
4
|
-
|
5
|
-
Based on work by kenta murata.
|
6
|
-
|
7
|
-
## Installation
|
8
|
-
|
9
|
-
Add this line to your application's Gemfile:
|
10
|
-
|
11
|
-
```ruby
|
12
|
-
gem 'mnist-learn'
|
13
|
-
```
|
14
|
-
|
15
|
-
And then execute:
|
16
|
-
|
17
|
-
$ bundle
|
18
|
-
|
19
|
-
Or install it yourself as:
|
20
|
-
|
21
|
-
$ gem install mnist-learn
|
22
|
-
|
23
|
-
## Usage
|
24
|
-
|
25
|
-
Load test and training data easily.
|
26
|
-
|
27
|
-
```ruby
|
28
|
-
require 'mnist-learn'
|
29
|
-
|
30
|
-
minst = Mnist.read_data_sets('data', one_hot: false) # auto download test and training archives and store them in /data
|
31
|
-
images = minst.train.images
|
32
|
-
labels = minst.train.labels
|
33
|
-
|
34
|
-
#you can also iterate in batches
|
35
|
-
|
36
|
-
train_images, train_labels = minst.train.next_batch(100)
|
37
|
-
test_images, test_labels = minst.test.next_batch(100)
|
38
|
-
```
|
39
|
-
|
40
|
-
## Development
|
41
|
-
|
42
|
-
After checking out the repo, run `bin/setup` to install dependencies. Then, run `bin/console` for an interactive prompt that will allow you to experiment.
|
43
|
-
|
44
|
-
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release` to create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
45
|
-
|
46
|
-
## Contributing
|
47
|
-
|
48
|
-
1. Fork it ( https://github.com/[my-github-username]/mnist-learn/fork )
|
49
|
-
2. Create your feature branch (`git checkout -b my-new-feature`)
|
50
|
-
3. Commit your changes (`git commit -am 'Add some feature'`)
|
51
|
-
4. Push to the branch (`git push origin my-new-feature`)
|
52
|
-
5. Create a new Pull Request
|
1
|
+
# Mnist
|
2
|
+
|
3
|
+
Utility ruby gem for easily loading and parsing the MNIST Database of handwritten digits for machine learning purposes.
|
4
|
+
|
5
|
+
Based on work by kenta murata.
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
Add this line to your application's Gemfile:
|
10
|
+
|
11
|
+
```ruby
|
12
|
+
gem 'mnist-learn'
|
13
|
+
```
|
14
|
+
|
15
|
+
And then execute:
|
16
|
+
|
17
|
+
$ bundle
|
18
|
+
|
19
|
+
Or install it yourself as:
|
20
|
+
|
21
|
+
$ gem install mnist-learn
|
22
|
+
|
23
|
+
## Usage
|
24
|
+
|
25
|
+
Load test and training data easily.
|
26
|
+
|
27
|
+
```ruby
|
28
|
+
require 'mnist-learn'
|
29
|
+
|
30
|
+
minst = Mnist.read_data_sets('data', one_hot: false) # auto download test and training archives and store them in /data
|
31
|
+
images = minst.train.images
|
32
|
+
labels = minst.train.labels
|
33
|
+
|
34
|
+
#you can also iterate in batches
|
35
|
+
|
36
|
+
train_images, train_labels = minst.train.next_batch(100)
|
37
|
+
test_images, test_labels = minst.test.next_batch(100)
|
38
|
+
```
|
39
|
+
|
40
|
+
## Development
|
41
|
+
|
42
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `bin/console` for an interactive prompt that will allow you to experiment.
|
43
|
+
|
44
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release` to create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
45
|
+
|
46
|
+
## Contributing
|
47
|
+
|
48
|
+
1. Fork it ( https://github.com/[my-github-username]/mnist-learn/fork )
|
49
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
50
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
51
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
52
|
+
5. Create a new Pull Request
|
data/Rakefile
CHANGED
@@ -1 +1 @@
|
|
1
|
-
require "bundler/gem_tasks"
|
1
|
+
require "bundler/gem_tasks"
|
data/exe/mnist2csv
CHANGED
@@ -1,45 +1,45 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require 'bundler/setup'
|
4
|
-
require 'mnist'
|
5
|
-
|
6
|
-
require 'optparse'
|
7
|
-
|
8
|
-
num_sample = nil
|
9
|
-
|
10
|
-
opt = OptionParser.new
|
11
|
-
opt.banner = "Usage: #{$0} [options] IMAGE_FILE LABEL_FILE"
|
12
|
-
opt.version = Mnist::VERSION
|
13
|
-
opt.separator ''
|
14
|
-
opt.separator 'Options:'
|
15
|
-
opt.on('-n', '--sample=N', Integer, 'The number of samples') {|v| num_sample = v }
|
16
|
-
opt.parse!(ARGV)
|
17
|
-
|
18
|
-
nrows, ncols, images = Mnist.load_images(ARGV[0])
|
19
|
-
labels = Mnist.load_labels(ARGV[1])
|
20
|
-
|
21
|
-
if images.length != labels.length
|
22
|
-
raise "The lengths of images and labels are not same"
|
23
|
-
end
|
24
|
-
|
25
|
-
print "y"
|
26
|
-
(nrows*ncols).times do |i|
|
27
|
-
print ",x#{i}"
|
28
|
-
end
|
29
|
-
print "\r\n"
|
30
|
-
|
31
|
-
indices = 0...labels.length
|
32
|
-
indices = [*indices].sample(num_sample) if num_sample
|
33
|
-
|
34
|
-
indices.each do |index|
|
35
|
-
label = labels[index]
|
36
|
-
print label
|
37
|
-
|
38
|
-
image = images[index]
|
39
|
-
pixels = image.unpack('C*')
|
40
|
-
pixels.each do |pixel|
|
41
|
-
print ",#{pixel}"
|
42
|
-
end
|
43
|
-
|
44
|
-
print "\r\n"
|
45
|
-
end
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'bundler/setup'
|
4
|
+
require 'mnist'
|
5
|
+
|
6
|
+
require 'optparse'
|
7
|
+
|
8
|
+
num_sample = nil
|
9
|
+
|
10
|
+
opt = OptionParser.new
|
11
|
+
opt.banner = "Usage: #{$0} [options] IMAGE_FILE LABEL_FILE"
|
12
|
+
opt.version = Mnist::VERSION
|
13
|
+
opt.separator ''
|
14
|
+
opt.separator 'Options:'
|
15
|
+
opt.on('-n', '--sample=N', Integer, 'The number of samples') {|v| num_sample = v }
|
16
|
+
opt.parse!(ARGV)
|
17
|
+
|
18
|
+
nrows, ncols, images = Mnist.load_images(ARGV[0])
|
19
|
+
labels = Mnist.load_labels(ARGV[1])
|
20
|
+
|
21
|
+
if images.length != labels.length
|
22
|
+
raise "The lengths of images and labels are not same"
|
23
|
+
end
|
24
|
+
|
25
|
+
print "y"
|
26
|
+
(nrows*ncols).times do |i|
|
27
|
+
print ",x#{i}"
|
28
|
+
end
|
29
|
+
print "\r\n"
|
30
|
+
|
31
|
+
indices = 0...labels.length
|
32
|
+
indices = [*indices].sample(num_sample) if num_sample
|
33
|
+
|
34
|
+
indices.each do |index|
|
35
|
+
label = labels[index]
|
36
|
+
print label
|
37
|
+
|
38
|
+
image = images[index]
|
39
|
+
pixels = image.unpack('C*')
|
40
|
+
pixels.each do |pixel|
|
41
|
+
print ",#{pixel}"
|
42
|
+
end
|
43
|
+
|
44
|
+
print "\r\n"
|
45
|
+
end
|
data/lib/mnist-learn.rb
CHANGED
@@ -1,205 +1,205 @@
|
|
1
|
-
require "mnist-learn/version"
|
2
|
-
require 'fileutils'
|
3
|
-
require 'zlib'
|
4
|
-
require 'net/http'
|
5
|
-
require 'ostruct'
|
6
|
-
|
7
|
-
module Mnist
|
8
|
-
class Error < StandardError; end
|
9
|
-
|
10
|
-
class LoadError < Error; end
|
11
|
-
|
12
|
-
class InvalidMagic < LoadError; end
|
13
|
-
|
14
|
-
class MnistReader
|
15
|
-
def initialize(base_path, one_hot = false)
|
16
|
-
@base_path = base_path
|
17
|
-
@one_hot = one_hot
|
18
|
-
end
|
19
|
-
|
20
|
-
def train
|
21
|
-
load_pair('train-images-idx3-ubyte', 'train-labels-idx1-ubyte')
|
22
|
-
end
|
23
|
-
|
24
|
-
def test
|
25
|
-
load_pair('t10k-images-idx3-ubyte', 't10k-labels-idx1-ubyte')
|
26
|
-
end
|
27
|
-
|
28
|
-
private
|
29
|
-
|
30
|
-
def load_pair(images, labels)
|
31
|
-
Loader.new(File.join(@base_path, images), File.join(@base_path, labels), @one_hot)
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
class Loader
|
36
|
-
IMAGE_FILE_MAGIC = 2051
|
37
|
-
LABEL_FILE_MAGIC = 2049
|
38
|
-
|
39
|
-
def initialize(filename_image, filename_label, one_hot)
|
40
|
-
@filename_image = filename_image
|
41
|
-
@filename_label = filename_label
|
42
|
-
@one_hot = one_hot
|
43
|
-
@index = 0
|
44
|
-
end
|
45
|
-
|
46
|
-
attr_reader :filename_image, :filename_label
|
47
|
-
|
48
|
-
def load_images
|
49
|
-
check_magic(input_images, IMAGE_FILE_MAGIC)
|
50
|
-
@total_count = read_total_count(input_images)
|
51
|
-
nrows, ncols = read_image_size(input_images)
|
52
|
-
images = @total_count.times.map do
|
53
|
-
read_image(nrows, ncols)
|
54
|
-
end
|
55
|
-
[nrows, ncols, images]
|
56
|
-
end
|
57
|
-
|
58
|
-
def load_labels
|
59
|
-
check_magic(input_labels, LABEL_FILE_MAGIC)
|
60
|
-
@total_count = read_total_count(input_labels)
|
61
|
-
read_labels(input_labels, @total_count)
|
62
|
-
end
|
63
|
-
|
64
|
-
def images
|
65
|
-
@all_images ||= load_images[2]
|
66
|
-
end
|
67
|
-
|
68
|
-
def labels
|
69
|
-
@all_labels ||= (@one_hot ? load_labels.map { |label_data| one_hot_transform(label_data) } : load_labels)
|
70
|
-
end
|
71
|
-
|
72
|
-
def next(batch_size)
|
73
|
-
if @index == 0
|
74
|
-
@rows, @columns, @images = load_images
|
75
|
-
@labels = load_labels
|
76
|
-
end
|
77
|
-
images = []
|
78
|
-
labels = []
|
79
|
-
batch_size.times.each do
|
80
|
-
next if @index >= @total_count
|
81
|
-
image_data = @images[@index]
|
82
|
-
label_data = @labels[@index]
|
83
|
-
image_data.map! { |b| b.to_f / 255.0 }
|
84
|
-
@index += 1
|
85
|
-
images << image_data
|
86
|
-
labels << (@one_hot ? one_hot_transform(label_data) : label_data.to_f)
|
87
|
-
end
|
88
|
-
[images, labels]
|
89
|
-
end
|
90
|
-
|
91
|
-
def next_batch(batch_size, rnd: Random.new)
|
92
|
-
@data_set ||= begin
|
93
|
-
rows, columns, images = load_images
|
94
|
-
labels = load_labels
|
95
|
-
Array.new(images.size) do
|
96
|
-
image_data = images[@index]
|
97
|
-
label_data = labels[@index]
|
98
|
-
image_data.map! { |b| b.to_f / 255.0 }
|
99
|
-
@index += 1
|
100
|
-
[image_data, (@one_hot ? one_hot_transform(label_data) : label_data.to_f)]
|
101
|
-
end
|
102
|
-
end
|
103
|
-
@data_set.shuffle!(random: rnd)
|
104
|
-
batch = @data_set[0...batch_size]
|
105
|
-
[batch.map { |v| v[0]}, batch.map { |v| v[1]}]
|
106
|
-
end
|
107
|
-
|
108
|
-
private
|
109
|
-
|
110
|
-
def one_hot_transform(label)
|
111
|
-
arr = Array.new(10) { 0.0 }
|
112
|
-
arr[label] = 1.0
|
113
|
-
arr
|
114
|
-
end
|
115
|
-
|
116
|
-
def check_magic(input_file, expected_magic)
|
117
|
-
actual_magic = read_magic(input_file)
|
118
|
-
unless actual_magic == expected_magic
|
119
|
-
raise InvalidMagic, "Expected #{expected_magic}, but #{actual_magic} is given"
|
120
|
-
end
|
121
|
-
end
|
122
|
-
|
123
|
-
def read_uint8(input_file, n=1)
|
124
|
-
input_file.read(n).unpack('C*')
|
125
|
-
end
|
126
|
-
|
127
|
-
def read_uint32(input_file, n=1)
|
128
|
-
input_file.read(4 * n).unpack('N*')
|
129
|
-
end
|
130
|
-
|
131
|
-
def read_magic(input_file)
|
132
|
-
read_uint32(input_file).first
|
133
|
-
end
|
134
|
-
|
135
|
-
def read_total_count(input_file)
|
136
|
-
read_uint32(input_file).first
|
137
|
-
end
|
138
|
-
|
139
|
-
def read_image_size(input_file)
|
140
|
-
read_uint32(input_file, 2)
|
141
|
-
end
|
142
|
-
|
143
|
-
alias read_labels read_uint8
|
144
|
-
|
145
|
-
def read_image(nrows, ncols)
|
146
|
-
input_images.read(nrows * ncols).unpack("C*")
|
147
|
-
end
|
148
|
-
|
149
|
-
def input_images
|
150
|
-
@input_images ||= File.open(filename_image)
|
151
|
-
end
|
152
|
-
|
153
|
-
def input_labels
|
154
|
-
@input_labels ||= File.open(filename_label)
|
155
|
-
end
|
156
|
-
end
|
157
|
-
|
158
|
-
def self.load_images(filename)
|
159
|
-
Loader.new(filename).load_images
|
160
|
-
end
|
161
|
-
|
162
|
-
def self.load_labels(filename)
|
163
|
-
Loader.new(filename).load_labels
|
164
|
-
end
|
165
|
-
|
166
|
-
def self.read_data_sets(path, one_hot: false)
|
167
|
-
unless Dir.exist?(path)
|
168
|
-
FileUtils.mkdir_p path
|
169
|
-
end
|
170
|
-
|
171
|
-
base_url = "yann.lecun.com"
|
172
|
-
filenames = [
|
173
|
-
"train-images-idx3-ubyte.gz",
|
174
|
-
"train-labels-idx1-ubyte.gz",
|
175
|
-
"t10k-images-idx3-ubyte.gz",
|
176
|
-
"t10k-labels-idx1-ubyte.gz"
|
177
|
-
]
|
178
|
-
Net::HTTP.start(base_url) do |http|
|
179
|
-
filenames.each do |name|
|
180
|
-
unless File.exists?(File.join(path, name))
|
181
|
-
f = File.open(File.join(path, name), "
|
182
|
-
begin
|
183
|
-
http.request_get('/exdb/mnist/' + name) do |resp|
|
184
|
-
resp.read_body do |segment|
|
185
|
-
f.write(segment)
|
186
|
-
end
|
187
|
-
end
|
188
|
-
ensure
|
189
|
-
f.close
|
190
|
-
end
|
191
|
-
end
|
192
|
-
end
|
193
|
-
end
|
194
|
-
|
195
|
-
filenames.each do |name|
|
196
|
-
next if File.exists?(File.join(path, File.basename(name, '.gz')))
|
197
|
-
puts "extracting #{name} ..."
|
198
|
-
Zlib::GzipReader.open(File.join(path, name)) do |zipfile|
|
199
|
-
outfile = File.open(File.join(path, File.basename(name, '.gz')), '
|
200
|
-
outfile.write(zipfile.read)
|
201
|
-
end
|
202
|
-
end
|
203
|
-
MnistReader.new(path, one_hot)
|
204
|
-
end
|
205
|
-
end
|
1
|
+
require "mnist-learn/version"
|
2
|
+
require 'fileutils'
|
3
|
+
require 'zlib'
|
4
|
+
require 'net/http'
|
5
|
+
require 'ostruct'
|
6
|
+
|
7
|
+
module Mnist
|
8
|
+
class Error < StandardError; end
|
9
|
+
|
10
|
+
class LoadError < Error; end
|
11
|
+
|
12
|
+
class InvalidMagic < LoadError; end
|
13
|
+
|
14
|
+
class MnistReader
|
15
|
+
def initialize(base_path, one_hot = false)
|
16
|
+
@base_path = base_path
|
17
|
+
@one_hot = one_hot
|
18
|
+
end
|
19
|
+
|
20
|
+
def train
|
21
|
+
load_pair('train-images-idx3-ubyte', 'train-labels-idx1-ubyte')
|
22
|
+
end
|
23
|
+
|
24
|
+
def test
|
25
|
+
load_pair('t10k-images-idx3-ubyte', 't10k-labels-idx1-ubyte')
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
def load_pair(images, labels)
|
31
|
+
Loader.new(File.join(@base_path, images), File.join(@base_path, labels), @one_hot)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
class Loader
|
36
|
+
IMAGE_FILE_MAGIC = 2051
|
37
|
+
LABEL_FILE_MAGIC = 2049
|
38
|
+
|
39
|
+
def initialize(filename_image, filename_label, one_hot)
|
40
|
+
@filename_image = filename_image
|
41
|
+
@filename_label = filename_label
|
42
|
+
@one_hot = one_hot
|
43
|
+
@index = 0
|
44
|
+
end
|
45
|
+
|
46
|
+
attr_reader :filename_image, :filename_label
|
47
|
+
|
48
|
+
def load_images
|
49
|
+
check_magic(input_images, IMAGE_FILE_MAGIC)
|
50
|
+
@total_count = read_total_count(input_images)
|
51
|
+
nrows, ncols = read_image_size(input_images)
|
52
|
+
images = @total_count.times.map do
|
53
|
+
read_image(nrows, ncols)
|
54
|
+
end
|
55
|
+
[nrows, ncols, images]
|
56
|
+
end
|
57
|
+
|
58
|
+
def load_labels
|
59
|
+
check_magic(input_labels, LABEL_FILE_MAGIC)
|
60
|
+
@total_count = read_total_count(input_labels)
|
61
|
+
read_labels(input_labels, @total_count)
|
62
|
+
end
|
63
|
+
|
64
|
+
def images
|
65
|
+
@all_images ||= load_images[2]
|
66
|
+
end
|
67
|
+
|
68
|
+
def labels
|
69
|
+
@all_labels ||= (@one_hot ? load_labels.map { |label_data| one_hot_transform(label_data) } : load_labels)
|
70
|
+
end
|
71
|
+
|
72
|
+
def next(batch_size)
|
73
|
+
if @index == 0
|
74
|
+
@rows, @columns, @images = load_images
|
75
|
+
@labels = load_labels
|
76
|
+
end
|
77
|
+
images = []
|
78
|
+
labels = []
|
79
|
+
batch_size.times.each do
|
80
|
+
next if @index >= @total_count
|
81
|
+
image_data = @images[@index]
|
82
|
+
label_data = @labels[@index]
|
83
|
+
image_data.map! { |b| b.to_f / 255.0 }
|
84
|
+
@index += 1
|
85
|
+
images << image_data
|
86
|
+
labels << (@one_hot ? one_hot_transform(label_data) : label_data.to_f)
|
87
|
+
end
|
88
|
+
[images, labels]
|
89
|
+
end
|
90
|
+
|
91
|
+
def next_batch(batch_size, rnd: Random.new)
|
92
|
+
@data_set ||= begin
|
93
|
+
rows, columns, images = load_images
|
94
|
+
labels = load_labels
|
95
|
+
Array.new(images.size) do
|
96
|
+
image_data = images[@index]
|
97
|
+
label_data = labels[@index]
|
98
|
+
image_data.map! { |b| b.to_f / 255.0 }
|
99
|
+
@index += 1
|
100
|
+
[image_data, (@one_hot ? one_hot_transform(label_data) : label_data.to_f)]
|
101
|
+
end
|
102
|
+
end
|
103
|
+
@data_set.shuffle!(random: rnd)
|
104
|
+
batch = @data_set[0...batch_size]
|
105
|
+
[batch.map { |v| v[0]}, batch.map { |v| v[1]}]
|
106
|
+
end
|
107
|
+
|
108
|
+
private
|
109
|
+
|
110
|
+
def one_hot_transform(label)
|
111
|
+
arr = Array.new(10) { 0.0 }
|
112
|
+
arr[label] = 1.0
|
113
|
+
arr
|
114
|
+
end
|
115
|
+
|
116
|
+
def check_magic(input_file, expected_magic)
|
117
|
+
actual_magic = read_magic(input_file)
|
118
|
+
unless actual_magic == expected_magic
|
119
|
+
raise InvalidMagic, "Expected #{expected_magic}, but #{actual_magic} is given"
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
def read_uint8(input_file, n=1)
|
124
|
+
input_file.read(n).unpack('C*')
|
125
|
+
end
|
126
|
+
|
127
|
+
def read_uint32(input_file, n=1)
|
128
|
+
input_file.read(4 * n).unpack('N*')
|
129
|
+
end
|
130
|
+
|
131
|
+
def read_magic(input_file)
|
132
|
+
read_uint32(input_file).first
|
133
|
+
end
|
134
|
+
|
135
|
+
def read_total_count(input_file)
|
136
|
+
read_uint32(input_file).first
|
137
|
+
end
|
138
|
+
|
139
|
+
def read_image_size(input_file)
|
140
|
+
read_uint32(input_file, 2)
|
141
|
+
end
|
142
|
+
|
143
|
+
alias read_labels read_uint8
|
144
|
+
|
145
|
+
def read_image(nrows, ncols)
|
146
|
+
input_images.read(nrows * ncols).unpack("C*")
|
147
|
+
end
|
148
|
+
|
149
|
+
def input_images
|
150
|
+
@input_images ||= File.open(filename_image)
|
151
|
+
end
|
152
|
+
|
153
|
+
def input_labels
|
154
|
+
@input_labels ||= File.open(filename_label)
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
def self.load_images(filename)
|
159
|
+
Loader.new(filename).load_images
|
160
|
+
end
|
161
|
+
|
162
|
+
def self.load_labels(filename)
|
163
|
+
Loader.new(filename).load_labels
|
164
|
+
end
|
165
|
+
|
166
|
+
def self.read_data_sets(path, one_hot: false)
|
167
|
+
unless Dir.exist?(path)
|
168
|
+
FileUtils.mkdir_p path
|
169
|
+
end
|
170
|
+
|
171
|
+
base_url = "yann.lecun.com"
|
172
|
+
filenames = [
|
173
|
+
"train-images-idx3-ubyte.gz",
|
174
|
+
"train-labels-idx1-ubyte.gz",
|
175
|
+
"t10k-images-idx3-ubyte.gz",
|
176
|
+
"t10k-labels-idx1-ubyte.gz"
|
177
|
+
]
|
178
|
+
Net::HTTP.start(base_url) do |http|
|
179
|
+
filenames.each do |name|
|
180
|
+
unless File.exists?(File.join(path, name))
|
181
|
+
f = File.open(File.join(path, name), "wb")
|
182
|
+
begin
|
183
|
+
http.request_get('/exdb/mnist/' + name) do |resp|
|
184
|
+
resp.read_body do |segment|
|
185
|
+
f.write(segment)
|
186
|
+
end
|
187
|
+
end
|
188
|
+
ensure
|
189
|
+
f.close
|
190
|
+
end
|
191
|
+
end
|
192
|
+
end
|
193
|
+
end
|
194
|
+
|
195
|
+
filenames.each do |name|
|
196
|
+
next if File.exists?(File.join(path, File.basename(name, '.gz')))
|
197
|
+
puts "extracting #{name} ..."
|
198
|
+
Zlib::GzipReader.open(File.join(path, name)) do |zipfile|
|
199
|
+
outfile = File.open(File.join(path, File.basename(name, '.gz')), 'wb')
|
200
|
+
outfile.write(zipfile.read)
|
201
|
+
end
|
202
|
+
end
|
203
|
+
MnistReader.new(path, one_hot)
|
204
|
+
end
|
205
|
+
end
|
data/lib/mnist-learn/version.rb
CHANGED
@@ -1,3 +1,3 @@
|
|
1
|
-
module Mnist
|
2
|
-
VERSION = "0.1.
|
3
|
-
end
|
1
|
+
module Mnist
|
2
|
+
VERSION = "0.1.3"
|
3
|
+
end
|
data/mnist-learn.gemspec
CHANGED
@@ -1,27 +1,27 @@
|
|
1
|
-
# coding: utf-8
|
2
|
-
lib = File.expand_path('../lib', __FILE__)
|
3
|
-
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
-
require 'mnist-learn/version'
|
5
|
-
|
6
|
-
Gem::Specification.new do |spec|
|
7
|
-
spec.name = "mnist-learn"
|
8
|
-
spec.version = Mnist::VERSION
|
9
|
-
spec.authors = ["Joseph Dayo"]
|
10
|
-
spec.email = ["joseph.dayo@gmail.com"]
|
11
|
-
|
12
|
-
spec.summary = %q{Utilities for MNIST handwritten digits data}
|
13
|
-
spec.description = %q{Utilities for MNIST handwritten digits data}
|
14
|
-
spec.homepage = "https://github.com/jedld/ruby-mnist"
|
15
|
-
spec.license = "MIT"
|
16
|
-
|
17
|
-
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
18
|
-
spec.bindir = "exe"
|
19
|
-
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
20
|
-
spec.require_paths = ["lib"]
|
21
|
-
|
22
|
-
spec.add_development_dependency "bundler"
|
23
|
-
spec.add_development_dependency "rake", "~> 10.0"
|
24
|
-
spec.add_development_dependency "rspec"
|
25
|
-
spec.add_development_dependency "pry-byebug"
|
26
|
-
spec.add_development_dependency "awesome_print"
|
27
|
-
end
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'mnist-learn/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "mnist-learn"
|
8
|
+
spec.version = Mnist::VERSION
|
9
|
+
spec.authors = ["Joseph Dayo"]
|
10
|
+
spec.email = ["joseph.dayo@gmail.com"]
|
11
|
+
|
12
|
+
spec.summary = %q{Utilities for MNIST handwritten digits data}
|
13
|
+
spec.description = %q{Utilities for MNIST handwritten digits data}
|
14
|
+
spec.homepage = "https://github.com/jedld/ruby-mnist"
|
15
|
+
spec.license = "MIT"
|
16
|
+
|
17
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
18
|
+
spec.bindir = "exe"
|
19
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
20
|
+
spec.require_paths = ["lib"]
|
21
|
+
|
22
|
+
spec.add_development_dependency "bundler"
|
23
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
24
|
+
spec.add_development_dependency "rspec"
|
25
|
+
spec.add_development_dependency "pry-byebug"
|
26
|
+
spec.add_development_dependency "awesome_print"
|
27
|
+
end
|
data/script/console
CHANGED
@@ -1,14 +1,14 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require "bundler/setup"
|
4
|
-
require "mnist"
|
5
|
-
|
6
|
-
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
-
# with your gem easier. You can also use a different console, if you like.
|
8
|
-
|
9
|
-
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
-
# require "pry"
|
11
|
-
# Pry.start
|
12
|
-
|
13
|
-
require "irb"
|
14
|
-
IRB.start
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "bundler/setup"
|
4
|
+
require "mnist"
|
5
|
+
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
8
|
+
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
10
|
+
# require "pry"
|
11
|
+
# Pry.start
|
12
|
+
|
13
|
+
require "irb"
|
14
|
+
IRB.start
|
data/script/setup
CHANGED
@@ -1,7 +1,7 @@
|
|
1
|
-
#!/bin/bash
|
2
|
-
set -euo pipefail
|
3
|
-
IFS=$'\n\t'
|
4
|
-
|
5
|
-
bundle install
|
6
|
-
|
7
|
-
# Do any other automated setup that you need to do here
|
1
|
+
#!/bin/bash
|
2
|
+
set -euo pipefail
|
3
|
+
IFS=$'\n\t'
|
4
|
+
|
5
|
+
bundle install
|
6
|
+
|
7
|
+
# Do any other automated setup that you need to do here
|
metadata
CHANGED
@@ -1,29 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mnist-learn
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Joseph Dayo
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-04-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '
|
19
|
+
version: '0'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - "
|
24
|
+
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '
|
26
|
+
version: '0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rake
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -89,7 +89,6 @@ extensions: []
|
|
89
89
|
extra_rdoc_files: []
|
90
90
|
files:
|
91
91
|
- ".gitignore"
|
92
|
-
- ".rspec_status"
|
93
92
|
- CODE_OF_CONDUCT.md
|
94
93
|
- Gemfile
|
95
94
|
- LICENSE.txt
|
@@ -122,7 +121,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
122
121
|
version: '0'
|
123
122
|
requirements: []
|
124
123
|
rubyforge_project:
|
125
|
-
rubygems_version: 2.
|
124
|
+
rubygems_version: 2.7.6.2
|
126
125
|
signing_key:
|
127
126
|
specification_version: 4
|
128
127
|
summary: Utilities for MNIST handwritten digits data
|