mnist-learn 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: ae87b3ff341c6db37f9d503da3c135a850546852
4
+ data.tar.gz: 78897ffc8dec031485af30fd87f2f7a71fc47a09
5
+ SHA512:
6
+ metadata.gz: 4aafa7cd4149b0014548247472eedc81443660d674f4861b7a3824c9d7b04fcd72b4bd7f331dc514d7e95196ec056c5d8c0b2d64095d5fc04d693e05f935ffc7
7
+ data.tar.gz: feaa0c636fa9d0be6ac078bb10431966174242477d77390a78344b588eb8b727ef87fc36342ff9c4db61bed0601b86d8a5d78a14695c82f4826468caac802a7a
data/.gitignore ADDED
@@ -0,0 +1,11 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /spec/fixtures/data
10
+ /tmp/
11
+ /data
data/.rspec_status ADDED
@@ -0,0 +1,4 @@
1
+ example_id | status | run_time |
2
+ --------------------------- | ------ | ------------ |
3
+ ./spec/mnist_spec.rb[1:1:1] | passed | 6.54 seconds |
4
+ ./spec/mnist_spec.rb[1:1:2] | failed | 2.62 seconds |
@@ -0,0 +1,13 @@
1
+ # Contributor Code of Conduct
2
+
3
+ As contributors and maintainers of this project, we pledge to respect all people who contribute through reporting issues, posting feature requests, updating documentation, submitting pull requests or patches, and other activities.
4
+
5
+ We are committed to making participation in this project a harassment-free experience for everyone, regardless of level of experience, gender, gender identity and expression, sexual orientation, disability, personal appearance, body size, race, age, or religion.
6
+
7
+ Examples of unacceptable behavior by participants include the use of sexual language or imagery, derogatory comments or personal attacks, trolling, public or private harassment, insults, or other unprofessional conduct.
8
+
9
+ Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct. Project maintainers who do not follow the Code of Conduct may be removed from the project team.
10
+
11
+ Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by opening an issue or contacting one or more of the project maintainers.
12
+
13
+ This Code of Conduct is adapted from the [Contributor Covenant](http:contributor-covenant.org), version 1.0.0, available at [http://contributor-covenant.org/version/1/0/0/](http://contributor-covenant.org/version/1/0/0/)
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in mnist.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2015 Kenta Murata
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,52 @@
1
+ # Mnist
2
+
3
+ Utility ruby gem for easily loading and parsing the MNIST Database of handwritten digits for machine learning purposes.
4
+
5
+ Based on work by kenta murata.
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ ```ruby
12
+ gem 'mnist-learn'
13
+ ```
14
+
15
+ And then execute:
16
+
17
+ $ bundle
18
+
19
+ Or install it yourself as:
20
+
21
+ $ gem install mnist-learn
22
+
23
+ ## Usage
24
+
25
+ Load test and training data easily.
26
+
27
+ ```ruby
28
+ require 'mnist-learn'
29
+
30
+ minst = Mnist.read_data_sets('data', one_hot: false) # auto download test and training archives and store them in /data
31
+ images = minst.train.images
32
+ labels = minst.train.labels
33
+
34
+ #you can also iterate in batches
35
+
36
+ train_images, train_labels = minst.train.next_batch(100)
37
+ test_images, test_labels = minst.test.next_batch(100)
38
+ ```
39
+
40
+ ## Development
41
+
42
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `bin/console` for an interactive prompt that will allow you to experiment.
43
+
44
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release` to create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
45
+
46
+ ## Contributing
47
+
48
+ 1. Fork it ( https://github.com/[my-github-username]/mnist-learn/fork )
49
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
50
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
51
+ 4. Push to the branch (`git push origin my-new-feature`)
52
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
data/exe/mnist2csv ADDED
@@ -0,0 +1,45 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'bundler/setup'
4
+ require 'mnist'
5
+
6
+ require 'optparse'
7
+
8
+ num_sample = nil
9
+
10
+ opt = OptionParser.new
11
+ opt.banner = "Usage: #{$0} [options] IMAGE_FILE LABEL_FILE"
12
+ opt.version = Mnist::VERSION
13
+ opt.separator ''
14
+ opt.separator 'Options:'
15
+ opt.on('-n', '--sample=N', Integer, 'The number of samples') {|v| num_sample = v }
16
+ opt.parse!(ARGV)
17
+
18
+ nrows, ncols, images = Mnist.load_images(ARGV[0])
19
+ labels = Mnist.load_labels(ARGV[1])
20
+
21
+ if images.length != labels.length
22
+ raise "The lengths of images and labels are not same"
23
+ end
24
+
25
+ print "y"
26
+ (nrows*ncols).times do |i|
27
+ print ",x#{i}"
28
+ end
29
+ print "\r\n"
30
+
31
+ indices = 0...labels.length
32
+ indices = [*indices].sample(num_sample) if num_sample
33
+
34
+ indices.each do |index|
35
+ label = labels[index]
36
+ print label
37
+
38
+ image = images[index]
39
+ pixels = image.unpack('C*')
40
+ pixels.each do |pixel|
41
+ print ",#{pixel}"
42
+ end
43
+
44
+ print "\r\n"
45
+ end
@@ -0,0 +1,3 @@
1
+ module Mnist
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,188 @@
1
+ require "mnist-learn/version"
2
+ require 'fileutils'
3
+ require 'zlib'
4
+ require 'net/http'
5
+ require 'ostruct'
6
+
7
+ module Mnist
8
+ class Error < StandardError; end
9
+
10
+ class LoadError < Error; end
11
+
12
+ class InvalidMagic < LoadError; end
13
+
14
+ class MnistReader
15
+ def initialize(base_path, one_hot = false)
16
+ @base_path = base_path
17
+ @one_hot = one_hot
18
+ end
19
+
20
+ def train
21
+ load_pair('train-images-idx3-ubyte', 'train-labels-idx1-ubyte')
22
+ end
23
+
24
+ def test
25
+ load_pair('t10k-images-idx3-ubyte', 't10k-labels-idx1-ubyte')
26
+ end
27
+
28
+ private
29
+
30
+ def load_pair(images, labels)
31
+ Loader.new(File.join(@base_path, images), File.join(@base_path, labels), @one_hot)
32
+ end
33
+ end
34
+
35
+ class Loader
36
+ IMAGE_FILE_MAGIC = 2051
37
+ LABEL_FILE_MAGIC = 2049
38
+
39
+ def initialize(filename_image, filename_label, one_hot)
40
+ @filename_image = filename_image
41
+ @filename_label = filename_label
42
+ @one_hot = one_hot
43
+ @index = 0
44
+ end
45
+
46
+ attr_reader :filename_image, :filename_label
47
+
48
+ def load_images
49
+ check_magic(input_images, IMAGE_FILE_MAGIC)
50
+ @total_count = read_total_count(input_images)
51
+ nrows, ncols = read_image_size(input_images)
52
+ images = @total_count.times.map do
53
+ read_image(nrows, ncols)
54
+ end
55
+ [nrows, ncols, images]
56
+ end
57
+
58
+ def load_labels
59
+ check_magic(input_labels, LABEL_FILE_MAGIC)
60
+ @total_count = read_total_count(input_labels)
61
+ read_labels(input_labels, @total_count)
62
+ end
63
+
64
+ def images
65
+ @all_images ||= load_images[2]
66
+ end
67
+
68
+ def labels
69
+ @all_labels ||= load_labels
70
+ end
71
+
72
+ def next_batch(batch_size)
73
+ if @index == 0
74
+ @rows, @columns, @images = load_images
75
+ @labels = load_labels
76
+ end
77
+ images = []
78
+ labels = []
79
+ batch_size.times.each do
80
+ next if @index >= @total_count
81
+ image_data = @images[@index]
82
+ label_data = @labels[@index]
83
+ image_data.map! { |b| b.to_f / 255.0 }
84
+ @index += 1
85
+ images << image_data
86
+ labels << (@one_hot ? one_hot_transform(label_data) : label_data.to_f)
87
+ end
88
+ [images, labels]
89
+ end
90
+
91
+ private
92
+
93
+ def one_hot_transform(label)
94
+ arr = Array.new(10) { 0.0 }
95
+ arr[label] = 1.0
96
+ arr
97
+ end
98
+
99
+ def check_magic(input_file, expected_magic)
100
+ actual_magic = read_magic(input_file)
101
+ unless actual_magic == expected_magic
102
+ raise InvalidMagic, "Expected #{expected_magic}, but #{actual_magic} is given"
103
+ end
104
+ end
105
+
106
+ def read_uint8(input_file, n=1)
107
+ input_file.read(n).unpack('C*')
108
+ end
109
+
110
+ def read_uint32(input_file, n=1)
111
+ input_file.read(4 * n).unpack('N*')
112
+ end
113
+
114
+ def read_magic(input_file)
115
+ read_uint32(input_file).first
116
+ end
117
+
118
+ def read_total_count(input_file)
119
+ read_uint32(input_file).first
120
+ end
121
+
122
+ def read_image_size(input_file)
123
+ read_uint32(input_file, 2)
124
+ end
125
+
126
+ alias read_labels read_uint8
127
+
128
+ def read_image(nrows, ncols)
129
+ input_images.read(nrows * ncols).unpack("C*")
130
+ end
131
+
132
+ def input_images
133
+ @input_images ||= File.open(filename_image)
134
+ end
135
+
136
+ def input_labels
137
+ @input_labels ||= File.open(filename_label)
138
+ end
139
+ end
140
+
141
+ def self.load_images(filename)
142
+ Loader.new(filename).load_images
143
+ end
144
+
145
+ def self.load_labels(filename)
146
+ Loader.new(filename).load_labels
147
+ end
148
+
149
+ def self.read_data_sets(path, one_hot: false)
150
+ unless Dir.exist?(path)
151
+ FileUtils.mkdir_p path
152
+ end
153
+
154
+ base_url = "yann.lecun.com"
155
+ filenames = [
156
+ "train-images-idx3-ubyte.gz",
157
+ "train-labels-idx1-ubyte.gz",
158
+ "t10k-images-idx3-ubyte.gz",
159
+ "t10k-labels-idx1-ubyte.gz"
160
+ ]
161
+ Net::HTTP.start(base_url) do |http|
162
+ filenames.each do |name|
163
+ unless File.exists?(File.join(path, name))
164
+ f = File.open(File.join(path, name), "w")
165
+ begin
166
+ http.request_get('/exdb/mnist/' + name) do |resp|
167
+ resp.read_body do |segment|
168
+ f.write(segment)
169
+ end
170
+ end
171
+ ensure
172
+ f.close
173
+ end
174
+ end
175
+ end
176
+ end
177
+
178
+ filenames.each do |name|
179
+ next if File.exists?(File.join(path, File.basename(name, '.gz')))
180
+ puts "extracting #{name} ..."
181
+ Zlib::GzipReader.open(File.join(path, name)) do |zipfile|
182
+ outfile = File.open(File.join(path, File.basename(name, '.gz')), 'w')
183
+ outfile.write(zipfile.read)
184
+ end
185
+ end
186
+ MnistReader.new(path, one_hot)
187
+ end
188
+ end
@@ -0,0 +1,27 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'mnist-learn/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "mnist-learn"
8
+ spec.version = Mnist::VERSION
9
+ spec.authors = ["Joseph Dayo"]
10
+ spec.email = ["joseph.dayo@gmail.com"]
11
+
12
+ spec.summary = %q{Utilities for MNIST handwritten digits data}
13
+ spec.description = %q{Utilities for MNIST handwritten digits data}
14
+ spec.homepage = "https://github.com/jedld/ruby-mnist"
15
+ spec.license = "MIT"
16
+
17
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
18
+ spec.bindir = "exe"
19
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
20
+ spec.require_paths = ["lib"]
21
+
22
+ spec.add_development_dependency "bundler", "~> 1.9"
23
+ spec.add_development_dependency "rake", "~> 10.0"
24
+ spec.add_development_dependency "rspec"
25
+ spec.add_development_dependency "pry-byebug"
26
+ spec.add_development_dependency "awesome_print"
27
+ end
data/script/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "mnist"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start
data/script/setup ADDED
@@ -0,0 +1,7 @@
1
+ #!/bin/bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+
5
+ bundle install
6
+
7
+ # Do any other automated setup that you need to do here
metadata ADDED
@@ -0,0 +1,128 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: mnist-learn
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Joseph Dayo
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2018-05-28 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.9'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.9'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: pry-byebug
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: awesome_print
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ description: Utilities for MNIST handwritten digits data
84
+ email:
85
+ - joseph.dayo@gmail.com
86
+ executables:
87
+ - mnist2csv
88
+ extensions: []
89
+ extra_rdoc_files: []
90
+ files:
91
+ - ".gitignore"
92
+ - ".rspec_status"
93
+ - CODE_OF_CONDUCT.md
94
+ - Gemfile
95
+ - LICENSE.txt
96
+ - README.md
97
+ - Rakefile
98
+ - exe/mnist2csv
99
+ - lib/mnist-learn.rb
100
+ - lib/mnist-learn/version.rb
101
+ - mnist-learn.gemspec
102
+ - script/console
103
+ - script/setup
104
+ homepage: https://github.com/jedld/ruby-mnist
105
+ licenses:
106
+ - MIT
107
+ metadata: {}
108
+ post_install_message:
109
+ rdoc_options: []
110
+ require_paths:
111
+ - lib
112
+ required_ruby_version: !ruby/object:Gem::Requirement
113
+ requirements:
114
+ - - ">="
115
+ - !ruby/object:Gem::Version
116
+ version: '0'
117
+ required_rubygems_version: !ruby/object:Gem::Requirement
118
+ requirements:
119
+ - - ">="
120
+ - !ruby/object:Gem::Version
121
+ version: '0'
122
+ requirements: []
123
+ rubyforge_project:
124
+ rubygems_version: 2.6.8
125
+ signing_key:
126
+ specification_version: 4
127
+ summary: Utilities for MNIST handwritten digits data
128
+ test_files: []