ruby_brain 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,82 @@
1
+ require 'ruby_brain'
2
+ require 'ruby_brain/dataset/mnist/data'
3
+
4
+ NUM_TRAIN_DATA = 5000
5
+ NUM_TEST_DATA = 500
6
+
7
+ dataset = RubyBrain::DataSet::Mnist::data
8
+
9
+ training_input = dataset[:input][0..(NUM_TRAIN_DATA-1)]
10
+ training_supervisor = dataset[:output][0..(NUM_TRAIN_DATA-1)]
11
+
12
+ # test_input = dataset[:input][NUM_TRAIN_DATA..(NUM_TRAIN_DATA+NUM_TEST_DATA-1)]
13
+ # test_supervisor = dataset[:output][NUM_TRAIN_DATA..(NUM_TRAIN_DATA+NUM_TEST_DATA-1)]
14
+ test_input = dataset[:input][NUM_TRAIN_DATA..-1]
15
+ test_supervisor = dataset[:output][NUM_TRAIN_DATA..-1]
16
+
17
+ network = RubyBrain::Network.new([dataset[:input].first.size, 50, dataset[:output].first.size])
18
+ # network.learning_rate = 0.7
19
+ network.init_network
20
+ network.load_weights_from_yaml_file(File.dirname(__FILE__) + '/../best_weights_1469044985.yml')
21
+
22
+ ### You can initializes weights by loading weights from file if you want.
23
+ # network.load_weights_from_yaml_file('path/to/weights.yml.file')
24
+
25
+ # network.learn(training_input, training_supervisor, max_training_count=100, tolerance=0.0004, monitoring_channels=[:best_params_training])
26
+
27
+ ### You can save weights into a yml file if you want.
28
+ # network.dump_weights_to_yaml('path/to/weights.yml.file')
29
+
30
+
31
+ class Array
32
+ def argmax
33
+ max_i = 0
34
+ max_val = self[max_i]
35
+ self.each_with_index do |v, i|
36
+ if v > max_val
37
+ max_val = v
38
+ max_i = i
39
+ end
40
+ end
41
+ return max_i
42
+ end
43
+ end
44
+
45
+ results = []
46
+ test_input.each_with_index do |input, i|
47
+ ### You can see test input, label and predicated lable in standard out if you uncomment in this block
48
+
49
+ input.each_with_index do |e, j|
50
+ print(e > 0.3 ? 'x' : ' ')
51
+ puts if (j % 28) == 0
52
+ end
53
+ puts
54
+ supervisor_label = test_supervisor[i].argmax
55
+ predicated_label = network.get_forward_outputs(test_input[i]).argmax
56
+ puts "test_supervisor: #{supervisor_label}"
57
+ puts "predicate: #{predicated_label}"
58
+ results << (supervisor_label == predicated_label)
59
+ puts "------------------------------------------------------------"
60
+ end
61
+
62
+ puts "accuracy: #{results.count(true).to_f/results.size}"
63
+
64
+
65
+
66
+ ### you can do above procedure simply by using Trainer
67
+
68
+ # training_option = {
69
+ # learning_rate: 0.5,
70
+ # max_training_count: 50,
71
+ # tolerance: 0.0004,
72
+ # # initial_weights_file: 'weights_3_30_10_1429166740.yml',
73
+ # # initial_weights_file: 'best_weights_1429544001.yml',
74
+ # monitoring_channels: [:best_params_training]
75
+ # }
76
+
77
+ # RubyBrain::Trainer.normal_learning([dataset[:input].first.size, 50, dataset[:output].first.size],
78
+ # training_input, training_supervisor,
79
+ # training_option)
80
+
81
+
82
+
data/lib/ruby_brain.rb ADDED
@@ -0,0 +1,19 @@
1
+ require "ruby_brain/version"
2
+ require 'forwardable'
3
+ require 'yaml'
4
+ require 'pp'
5
+
6
+ module RubyBrain
7
+ require "ruby_brain/nodes"
8
+ require "ruby_brain/layer"
9
+ require "ruby_brain/weights"
10
+ require "ruby_brain/network"
11
+ require "ruby_brain/trainer"
12
+ require 'ruby_brain/exception'
13
+ require 'ruby_brain/training_data_manipulator'
14
+
15
+ module Nodes end
16
+ module Trainer end
17
+ module Exception end
18
+ module DataSet end
19
+ end
@@ -0,0 +1,62 @@
1
+ module RubyBrain::DataSet::Mnist
2
+
3
+ require 'mnist'
4
+ require 'open-uri'
5
+
6
+ def download_file(target_url, dest_path)
7
+ File.open(dest_path, "wb") do |saved_file|
8
+ open(target_url, "rb") do |read_file|
9
+ saved_file.write(read_file.read)
10
+ end
11
+ end
12
+ end
13
+
14
+ def data
15
+ train_images_path = Dir.pwd + '/train-images-idx3-ubyte.gz'
16
+ train_labels_path = Dir.pwd + '/train-labels-idx1-ubyte.gz'
17
+
18
+ unless File.exist?(train_images_path)
19
+ puts 'downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz ...'
20
+ download_file('http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz', train_images_path)
21
+ end
22
+
23
+ unless File.exist?(train_labels_path)
24
+ puts 'downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz'
25
+ download_file('http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz', train_labels_path)
26
+ end
27
+
28
+ train_images = Mnist.load_images(train_images_path)
29
+ train_labels = Mnist.load_labels(train_labels_path)
30
+
31
+ input_training_set = train_images[2].map do |image|
32
+ image.unpack('C*').map {|e| e / 255.0}
33
+ end
34
+
35
+ output_training_set = train_labels.map do |label|
36
+ one_hot_vector = Array.new(10, 0)
37
+ one_hot_vector[label] = 1
38
+ one_hot_vector
39
+ end
40
+
41
+ # puts train_images[0].class
42
+ # puts train_images[1].class
43
+ # puts train_images[2].size
44
+ # puts train_images[2][0].size
45
+ # puts train_images[2][59999][783].class
46
+ # puts train_images[2][59999].class
47
+ # puts "------------------------------"
48
+
49
+ # 10.times do |j|
50
+ # train_images[2][j].unpack('C*').each_with_index do |e, i|
51
+ # print(e > 50 ? 'x' : ' ')
52
+ # puts if (i % 28) == 0
53
+ # end
54
+ # puts
55
+ # puts train_labels[j]
56
+ # end
57
+
58
+ {input: input_training_set, output: output_training_set}
59
+ end
60
+
61
+ module_function :data, :download_file
62
+ end
@@ -0,0 +1,28 @@
1
+ require 'mnist'
2
+
3
+
4
+ train_images = Mnist.load_images('./train-images-idx3-ubyte.gz')
5
+ train_labels = Mnist.load_labels('./train-labels-idx1-ubyte.gz')
6
+
7
+ puts train_images[0].class
8
+ puts train_images[1].class
9
+ puts train_images[2].size
10
+ puts train_images[2][0].size
11
+ puts train_images[2][59999][783].class
12
+ puts train_images[2][59999].class
13
+ puts "------------------------------"
14
+
15
+ 10.times do |j|
16
+ train_images[2][j].unpack('C*').each_with_index do |e, i|
17
+ print(e > 50 ? 'x' : ' ')
18
+ puts if (i % 28) == 0
19
+ end
20
+ puts
21
+ puts train_labels[j]
22
+ end
23
+
24
+
25
+
26
+
27
+
28
+
@@ -0,0 +1,17 @@
1
+
2
+
3
+ module RubyBrain
4
+ module Exception
5
+
6
+ class RubyBrainError < StandardError
7
+ end
8
+
9
+
10
+ class DataDimensionError < RubyBrainError
11
+ end
12
+
13
+ class TrainingDataError < DataDimensionError
14
+ end
15
+
16
+ end
17
+ end
@@ -0,0 +1,37 @@
1
+ module RubyBrain
2
+ class Layer
3
+ attr_accessor :input_weights, :output_weights
4
+ attr_reader :next_node_order_index, :nodes
5
+
6
+ def initialize
7
+ @nodes = []
8
+ @next_node_order_index = 0
9
+ end
10
+
11
+ def append(node)
12
+ node.order_index = @next_node_order_index
13
+ node.left_side_weights = @input_weights
14
+ node.right_side_weights = @output_weights
15
+ @nodes << node
16
+ @next_node_order_index += 1
17
+ end
18
+
19
+ def num_nodes
20
+ @nodes.size
21
+ end
22
+
23
+ def each_node
24
+ @nodes.each do |node|
25
+ yield node
26
+ end
27
+ end
28
+
29
+ def forward_outputs(inputs=[])
30
+ @nodes.map { |node| node.output_of_forward_calc(inputs) }
31
+ end
32
+
33
+ def backward_outputs(inputs)
34
+ @nodes.map { |node| node.output_of_backward_calc(inputs) }.compact
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,252 @@
1
+ module RubyBrain
2
+ class Network
3
+ extend Forwardable
4
+ def_delegators :@weights_set, :overwrite_weights, :get_weights_as_array
5
+
6
+ attr_accessor :learning_rate
7
+
8
+ def initialize(num_units_list)
9
+ @layers = []
10
+ @num_units_list = num_units_list
11
+ @weights_set = WeightContainer.new(@num_units_list)
12
+ end
13
+
14
+ def load_weights_from(weights_set_source)
15
+ @weights_set.load_from(weights_set_source)
16
+ init_network
17
+ end
18
+
19
+ # def overwrite_weights(weights_set_source)
20
+ # @weights_set.overwrite_weights(weights_set_source)
21
+ # end
22
+
23
+ def init_network
24
+ @layers = []
25
+ layer = Layer.new
26
+ (@num_units_list[0] + 1).times do
27
+ layer.append Nodes::ConstNode.new
28
+ layer.output_weights = @weights_set.weights_of_order(0)
29
+ end
30
+ @layers << layer
31
+
32
+ @num_units_list[1..-2].each_with_index do |num_units, i|
33
+ layer = Layer.new
34
+ layer.input_weights = @weights_set.weights_of_order(i)
35
+ layer.output_weights = @weights_set.weights_of_order(i+1)
36
+ (num_units).times do
37
+ layer.append Nodes::Neuron.new
38
+ end
39
+ layer.append Nodes::ConstNode.new
40
+ @layers << layer
41
+ end
42
+
43
+ layer = Layer.new
44
+ layer.input_weights = @weights_set.weights_of_order(@num_units_list.size - 2)
45
+ @num_units_list[-1].times do
46
+ layer.append Nodes::Neuron.new
47
+ end
48
+ @layers << layer
49
+ end
50
+
51
+ # def get_weights_as_array
52
+ # @weights_set.get_weights_as_array
53
+ # end
54
+
55
+
56
+ def get_forward_outputs(inputs)
57
+ inputs.each_with_index do |input, i|
58
+ @layers.first.nodes[i].value = input
59
+ end
60
+
61
+ a_layer_outputs = nil
62
+ a_layer_inputs = @layers.first.forward_outputs
63
+ @layers.each do |layer|
64
+ a_layer_outputs = layer.forward_outputs(a_layer_inputs)
65
+ a_layer_inputs = a_layer_outputs
66
+ end
67
+ a_layer_outputs
68
+ end
69
+
70
+ def run_backpropagate(backward_inputs)
71
+ a_layer_outputs = nil
72
+ a_layer_inputs = backward_inputs
73
+ @layers.reverse[0..-2].each do |layer|
74
+ a_layer_outputs = layer.backward_outputs(a_layer_inputs)
75
+ a_layer_inputs = a_layer_outputs
76
+ end
77
+ a_layer_outputs
78
+ end
79
+
80
+ def update_weights
81
+ @weights_set.each_weights_with_index do |weights, i|
82
+ weights.each_with_index do |wl, j|
83
+ wl.each_with_index do |w, k|
84
+ wl[k] = w - (@learning_rate * @layers[i].nodes[j].this_output * @layers[i+1].nodes[k].this_backward_output)
85
+ end
86
+ end
87
+ end
88
+ end
89
+
90
+ def update_weights_of_layer(layer_index)
91
+ layer_index = @weights_set.num_sets + layer_index if layer_index < 0
92
+ @weights_set.each_weights_with_index do |weights, i|
93
+ next if i != layer_index
94
+ weights.each_with_index do |wl, j|
95
+ wl.each_with_index do |w, k|
96
+ wl[k] = w - (@learning_rate * @layers[i].nodes[j].this_output * @layers[i+1].nodes[k].this_backward_output)
97
+ end
98
+ end
99
+ end
100
+ end
101
+
102
+ # def calculate_rms_error(training_inputs_set, training_outputs_set)
103
+ # accumulated_errors = 0.0
104
+ # training_inputs_set.zip(training_outputs_set).each do |t_input, t_output|
105
+ # forward_outputs = get_forward_outputs(t_input)
106
+ # total_error_of_output_nodes = 0.0
107
+ # forward_outputs.zip(t_output).each do |o, t|
108
+ # total_error_of_output_nodes += (o - t)**2 / 2.0
109
+ # end
110
+ # accumulated_errors += total_error_of_output_nodes / forward_outputs.size
111
+ # end
112
+ # Math.sqrt(2.0 * accumulated_errors / training_inputs_set.size)
113
+ # end
114
+
115
+ def learn(inputs_set, outputs_set, max_training_count=50, tolerance=0.0, monitoring_channels=[])
116
+ raise RubyBrain::Exception::TrainingDataError if inputs_set.size != outputs_set.size
117
+ # raise "inputs_set and outputs_set has different size!!!!" if inputs_set.size != outputs_set.size
118
+
119
+ best_error = 9999999999999
120
+ best_weights_array = []
121
+ max_training_count.times do |i_training|
122
+ accumulated_errors = 0.0 # for rms
123
+ inputs_set.zip(outputs_set).each do |t_input, t_output|
124
+ forward_outputs = get_forward_outputs(t_input)
125
+ # for rms start
126
+ total_error_of_output_nodes = forward_outputs.zip(t_output).reduce(0.0) do |a, output_pair|
127
+ a + ((output_pair[0] - output_pair[1])**2 / 2.0)
128
+ end
129
+ # end
130
+ accumulated_errors += total_error_of_output_nodes / forward_outputs.size
131
+ # accumulated_errors += forward_outputs.zip(t_output).reduce(0.0) { |a, output_pair| a + ((output_pair[0] - output_pair[1])**2 / 2.0) } / forward_outputs.size
132
+ # for rms end
133
+ backward_inputs = forward_outputs.zip(t_output).map { |o, t| o - t }
134
+ run_backpropagate(backward_inputs)
135
+ update_weights
136
+ end
137
+
138
+ rms_error = Math.sqrt(2.0 * accumulated_errors / inputs_set.size) # for rms
139
+ # rms_error = calculate_rms_error(inputs_set, outputs_set)
140
+ puts "--> #{rms_error} (#{i_training}/#{max_training_count})"
141
+
142
+ if rms_error < best_error
143
+ puts "update best!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
144
+ best_error = rms_error
145
+ best_weights_array = @weights_set.get_weights_as_array
146
+ end
147
+ puts "best: #{best_error}"
148
+
149
+
150
+ break if rms_error <= tolerance
151
+ end
152
+ if monitoring_channels.include? :best_params_training
153
+ File.open "best_weights_#{Time.now.to_i}.yml", 'w+' do |f|
154
+ YAML.dump(best_weights_array, f)
155
+ end
156
+ end
157
+ end
158
+
159
+
160
+ def learn2(inputs_set, outputs_set, max_training_count=50, tolerance=0.0, monitoring_channels=[])
161
+ # looks like works well for networks which has many layers... [1, 10, 10, 10, 1], [1, 100, 100, 100, 1]
162
+ # looks like NOT works well for networks which has many units in a layer... [1, 100, 1]
163
+ raise RubyBrain::Exception::TrainingDataError if inputs_set.size != outputs_set.size
164
+ # raise "inputs_set and outputs_set has different size!!!!" if inputs_set.size != outputs_set.size
165
+ initial_learning_rate = @learning_rate
166
+
167
+ rms_error = Float::INFINITY
168
+ max_training_count.times do |i_training|
169
+ accumulated_errors = 0.0 # for rms
170
+ inputs_set.zip(outputs_set).each do |t_input, t_output|
171
+ forward_outputs = get_forward_outputs(t_input)
172
+ # for rms start
173
+ total_error_of_output_nodes = forward_outputs.zip(t_output).reduce(0.0) do |a, output_pair|
174
+ a + ((output_pair[0] - output_pair[1])**2 / 2.0)
175
+ end
176
+ # end
177
+ error_of_this_training_data = total_error_of_output_nodes / forward_outputs.size
178
+ accumulated_errors += error_of_this_training_data
179
+ # accumulated_errors += forward_outputs.zip(t_output).reduce(0.0) { |a, output_pair| a + ((output_pair[0] - output_pair[1])**2 / 2.0) } / forward_outputs.size
180
+ # for rms end
181
+ # if error_of_this_training_data > rms_error**2/2.0
182
+ # @learning_rate *= 10.0
183
+ # end
184
+ backward_inputs = forward_outputs.zip(t_output).map { |o, t| o - t }
185
+ run_backpropagate(backward_inputs)
186
+ update_weights
187
+ # @learning_rate = initial_learning_rate
188
+ end
189
+
190
+ rms_error = Math.sqrt(2.0 * accumulated_errors / inputs_set.size) # for rms
191
+
192
+ # rms_error = calculate_rms_error(inputs_set, outputs_set)
193
+ puts "--> #{rms_error} (#{i_training}/#{max_training_count})"
194
+ break if rms_error <= tolerance
195
+ end
196
+ end
197
+
198
+
199
+ def learn_only_specified_layer(layer_index, inputs_set, outputs_set, max_training_count=50, tolerance=0.0)
200
+ # looks like works well for networks which has many layers... [1, 10, 10, 10, 1], [1, 100, 100, 100, 1]
201
+ # looks like NOT works well for networks which has many units in a layer... [1, 100, 1]
202
+ raise "inputs_set and outputs_set has different size!!!!" if inputs_set.size != outputs_set.size
203
+ initial_learning_rate = @learning_rate
204
+
205
+ rms_error = Float::INFINITY
206
+ max_training_count.times do |i_training|
207
+ accumulated_errors = 0.0 # for rms
208
+ inputs_set.zip(outputs_set).each do |t_input, t_output|
209
+ forward_outputs = get_forward_outputs(t_input)
210
+ # for rms start
211
+ total_error_of_output_nodes = forward_outputs.zip(t_output).reduce(0.0) do |a, output_pair|
212
+ a + ((output_pair[0] - output_pair[1])**2 / 2.0)
213
+ end
214
+ # end
215
+ error_of_this_training_data = total_error_of_output_nodes / forward_outputs.size
216
+ accumulated_errors += error_of_this_training_data
217
+ # accumulated_errors += forward_outputs.zip(t_output).reduce(0.0) { |a, output_pair| a + ((output_pair[0] - output_pair[1])**2 / 2.0) } / forward_outputs.size
218
+ # for rms end
219
+ if error_of_this_training_data > rms_error**2/2.0
220
+ @learning_rate *= 10.0
221
+ end
222
+ backward_inputs = forward_outputs.zip(t_output).map { |o, t| o - t }
223
+ run_backpropagate(backward_inputs)
224
+ update_weights_of_layer(layer_index)
225
+ @learning_rate = initial_learning_rate
226
+ end
227
+
228
+ rms_error = Math.sqrt(2.0 * accumulated_errors / inputs_set.size) # for rms
229
+
230
+ # rms_error = calculate_rms_error(inputs_set, outputs_set)
231
+ puts "--> #{rms_error} (#{i_training}/#{max_training_count})"
232
+ break if rms_error <= tolerance
233
+ end
234
+ end
235
+
236
+
237
+ def dump_weights
238
+ @weights_set.each_weights do |weights|
239
+ pp weights
240
+ end
241
+ end
242
+
243
+ def dump_weights_to_yaml(file_name=nil)
244
+ @weights_set.dump_to_yaml(file_name)
245
+ end
246
+
247
+ def load_weights_from_yaml_file(yaml_file)
248
+ @weights_set.load_from_yaml_file(yaml_file)
249
+ end
250
+
251
+ end
252
+ end