ruby_brain 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,82 @@
1
+ require 'ruby_brain'
2
+ require 'ruby_brain/dataset/mnist/data'
3
+
4
+ NUM_TRAIN_DATA = 5000
5
+ NUM_TEST_DATA = 500
6
+
7
+ dataset = RubyBrain::DataSet::Mnist::data
8
+
9
+ training_input = dataset[:input][0..(NUM_TRAIN_DATA-1)]
10
+ training_supervisor = dataset[:output][0..(NUM_TRAIN_DATA-1)]
11
+
12
+ # test_input = dataset[:input][NUM_TRAIN_DATA..(NUM_TRAIN_DATA+NUM_TEST_DATA-1)]
13
+ # test_supervisor = dataset[:output][NUM_TRAIN_DATA..(NUM_TRAIN_DATA+NUM_TEST_DATA-1)]
14
+ test_input = dataset[:input][NUM_TRAIN_DATA..-1]
15
+ test_supervisor = dataset[:output][NUM_TRAIN_DATA..-1]
16
+
17
+ network = RubyBrain::Network.new([dataset[:input].first.size, 50, dataset[:output].first.size])
18
+ # network.learning_rate = 0.7
19
+ network.init_network
20
+ network.load_weights_from_yaml_file(File.dirname(__FILE__) + '/../best_weights_1469044985.yml')
21
+
22
+ ### You can initializes weights by loading weights from file if you want.
23
+ # network.load_weights_from_yaml_file('path/to/weights.yml.file')
24
+
25
+ # network.learn(training_input, training_supervisor, max_training_count=100, tolerance=0.0004, monitoring_channels=[:best_params_training])
26
+
27
+ ### You can save weights into a yml file if you want.
28
+ # network.dump_weights_to_yaml('path/to/weights.yml.file')
29
+
30
+
31
+ class Array
32
+ def argmax
33
+ max_i = 0
34
+ max_val = self[max_i]
35
+ self.each_with_index do |v, i|
36
+ if v > max_val
37
+ max_val = v
38
+ max_i = i
39
+ end
40
+ end
41
+ return max_i
42
+ end
43
+ end
44
+
45
+ results = []
46
+ test_input.each_with_index do |input, i|
47
+ ### You can see test input, label and predicated lable in standard out if you uncomment in this block
48
+
49
+ input.each_with_index do |e, j|
50
+ print(e > 0.3 ? 'x' : ' ')
51
+ puts if (j % 28) == 0
52
+ end
53
+ puts
54
+ supervisor_label = test_supervisor[i].argmax
55
+ predicated_label = network.get_forward_outputs(test_input[i]).argmax
56
+ puts "test_supervisor: #{supervisor_label}"
57
+ puts "predicate: #{predicated_label}"
58
+ results << (supervisor_label == predicated_label)
59
+ puts "------------------------------------------------------------"
60
+ end
61
+
62
+ puts "accuracy: #{results.count(true).to_f/results.size}"
63
+
64
+
65
+
66
+ ### you can do above procedure simply by using Trainer
67
+
68
+ # training_option = {
69
+ # learning_rate: 0.5,
70
+ # max_training_count: 50,
71
+ # tolerance: 0.0004,
72
+ # # initial_weights_file: 'weights_3_30_10_1429166740.yml',
73
+ # # initial_weights_file: 'best_weights_1429544001.yml',
74
+ # monitoring_channels: [:best_params_training]
75
+ # }
76
+
77
+ # RubyBrain::Trainer.normal_learning([dataset[:input].first.size, 50, dataset[:output].first.size],
78
+ # training_input, training_supervisor,
79
+ # training_option)
80
+
81
+
82
+
data/lib/ruby_brain.rb ADDED
@@ -0,0 +1,19 @@
1
+ require "ruby_brain/version"
2
+ require 'forwardable'
3
+ require 'yaml'
4
+ require 'pp'
5
+
6
+ module RubyBrain
7
+ require "ruby_brain/nodes"
8
+ require "ruby_brain/layer"
9
+ require "ruby_brain/weights"
10
+ require "ruby_brain/network"
11
+ require "ruby_brain/trainer"
12
+ require 'ruby_brain/exception'
13
+ require 'ruby_brain/training_data_manipulator'
14
+
15
+ module Nodes end
16
+ module Trainer end
17
+ module Exception end
18
+ module DataSet end
19
+ end
@@ -0,0 +1,62 @@
1
+ module RubyBrain::DataSet::Mnist
2
+
3
+ require 'mnist'
4
+ require 'open-uri'
5
+
6
+ def download_file(target_url, dest_path)
7
+ File.open(dest_path, "wb") do |saved_file|
8
+ open(target_url, "rb") do |read_file|
9
+ saved_file.write(read_file.read)
10
+ end
11
+ end
12
+ end
13
+
14
+ def data
15
+ train_images_path = Dir.pwd + '/train-images-idx3-ubyte.gz'
16
+ train_labels_path = Dir.pwd + '/train-labels-idx1-ubyte.gz'
17
+
18
+ unless File.exist?(train_images_path)
19
+ puts 'downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz ...'
20
+ download_file('http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz', train_images_path)
21
+ end
22
+
23
+ unless File.exist?(train_labels_path)
24
+ puts 'downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz'
25
+ download_file('http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz', train_labels_path)
26
+ end
27
+
28
+ train_images = Mnist.load_images(train_images_path)
29
+ train_labels = Mnist.load_labels(train_labels_path)
30
+
31
+ input_training_set = train_images[2].map do |image|
32
+ image.unpack('C*').map {|e| e / 255.0}
33
+ end
34
+
35
+ output_training_set = train_labels.map do |label|
36
+ one_hot_vector = Array.new(10, 0)
37
+ one_hot_vector[label] = 1
38
+ one_hot_vector
39
+ end
40
+
41
+ # puts train_images[0].class
42
+ # puts train_images[1].class
43
+ # puts train_images[2].size
44
+ # puts train_images[2][0].size
45
+ # puts train_images[2][59999][783].class
46
+ # puts train_images[2][59999].class
47
+ # puts "------------------------------"
48
+
49
+ # 10.times do |j|
50
+ # train_images[2][j].unpack('C*').each_with_index do |e, i|
51
+ # print(e > 50 ? 'x' : ' ')
52
+ # puts if (i % 28) == 0
53
+ # end
54
+ # puts
55
+ # puts train_labels[j]
56
+ # end
57
+
58
+ {input: input_training_set, output: output_training_set}
59
+ end
60
+
61
+ module_function :data, :download_file
62
+ end
@@ -0,0 +1,28 @@
1
+ require 'mnist'
2
+
3
+
4
+ train_images = Mnist.load_images('./train-images-idx3-ubyte.gz')
5
+ train_labels = Mnist.load_labels('./train-labels-idx1-ubyte.gz')
6
+
7
+ puts train_images[0].class
8
+ puts train_images[1].class
9
+ puts train_images[2].size
10
+ puts train_images[2][0].size
11
+ puts train_images[2][59999][783].class
12
+ puts train_images[2][59999].class
13
+ puts "------------------------------"
14
+
15
+ 10.times do |j|
16
+ train_images[2][j].unpack('C*').each_with_index do |e, i|
17
+ print(e > 50 ? 'x' : ' ')
18
+ puts if (i % 28) == 0
19
+ end
20
+ puts
21
+ puts train_labels[j]
22
+ end
23
+
24
+
25
+
26
+
27
+
28
+
@@ -0,0 +1,17 @@
1
+
2
+
3
+ module RubyBrain
4
+ module Exception
5
+
6
+ class RubyBrainError < StandardError
7
+ end
8
+
9
+
10
+ class DataDimensionError < RubyBrainError
11
+ end
12
+
13
+ class TrainingDataError < DataDimensionError
14
+ end
15
+
16
+ end
17
+ end
@@ -0,0 +1,37 @@
1
+ module RubyBrain
2
+ class Layer
3
+ attr_accessor :input_weights, :output_weights
4
+ attr_reader :next_node_order_index, :nodes
5
+
6
+ def initialize
7
+ @nodes = []
8
+ @next_node_order_index = 0
9
+ end
10
+
11
+ def append(node)
12
+ node.order_index = @next_node_order_index
13
+ node.left_side_weights = @input_weights
14
+ node.right_side_weights = @output_weights
15
+ @nodes << node
16
+ @next_node_order_index += 1
17
+ end
18
+
19
+ def num_nodes
20
+ @nodes.size
21
+ end
22
+
23
+ def each_node
24
+ @nodes.each do |node|
25
+ yield node
26
+ end
27
+ end
28
+
29
+ def forward_outputs(inputs=[])
30
+ @nodes.map { |node| node.output_of_forward_calc(inputs) }
31
+ end
32
+
33
+ def backward_outputs(inputs)
34
+ @nodes.map { |node| node.output_of_backward_calc(inputs) }.compact
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,252 @@
1
+ module RubyBrain
2
+ class Network
3
+ extend Forwardable
4
+ def_delegators :@weights_set, :overwrite_weights, :get_weights_as_array
5
+
6
+ attr_accessor :learning_rate
7
+
8
+ def initialize(num_units_list)
9
+ @layers = []
10
+ @num_units_list = num_units_list
11
+ @weights_set = WeightContainer.new(@num_units_list)
12
+ end
13
+
14
+ def load_weights_from(weights_set_source)
15
+ @weights_set.load_from(weights_set_source)
16
+ init_network
17
+ end
18
+
19
+ # def overwrite_weights(weights_set_source)
20
+ # @weights_set.overwrite_weights(weights_set_source)
21
+ # end
22
+
23
+ def init_network
24
+ @layers = []
25
+ layer = Layer.new
26
+ (@num_units_list[0] + 1).times do
27
+ layer.append Nodes::ConstNode.new
28
+ layer.output_weights = @weights_set.weights_of_order(0)
29
+ end
30
+ @layers << layer
31
+
32
+ @num_units_list[1..-2].each_with_index do |num_units, i|
33
+ layer = Layer.new
34
+ layer.input_weights = @weights_set.weights_of_order(i)
35
+ layer.output_weights = @weights_set.weights_of_order(i+1)
36
+ (num_units).times do
37
+ layer.append Nodes::Neuron.new
38
+ end
39
+ layer.append Nodes::ConstNode.new
40
+ @layers << layer
41
+ end
42
+
43
+ layer = Layer.new
44
+ layer.input_weights = @weights_set.weights_of_order(@num_units_list.size - 2)
45
+ @num_units_list[-1].times do
46
+ layer.append Nodes::Neuron.new
47
+ end
48
+ @layers << layer
49
+ end
50
+
51
+ # def get_weights_as_array
52
+ # @weights_set.get_weights_as_array
53
+ # end
54
+
55
+
56
+ def get_forward_outputs(inputs)
57
+ inputs.each_with_index do |input, i|
58
+ @layers.first.nodes[i].value = input
59
+ end
60
+
61
+ a_layer_outputs = nil
62
+ a_layer_inputs = @layers.first.forward_outputs
63
+ @layers.each do |layer|
64
+ a_layer_outputs = layer.forward_outputs(a_layer_inputs)
65
+ a_layer_inputs = a_layer_outputs
66
+ end
67
+ a_layer_outputs
68
+ end
69
+
70
+ def run_backpropagate(backward_inputs)
71
+ a_layer_outputs = nil
72
+ a_layer_inputs = backward_inputs
73
+ @layers.reverse[0..-2].each do |layer|
74
+ a_layer_outputs = layer.backward_outputs(a_layer_inputs)
75
+ a_layer_inputs = a_layer_outputs
76
+ end
77
+ a_layer_outputs
78
+ end
79
+
80
+ def update_weights
81
+ @weights_set.each_weights_with_index do |weights, i|
82
+ weights.each_with_index do |wl, j|
83
+ wl.each_with_index do |w, k|
84
+ wl[k] = w - (@learning_rate * @layers[i].nodes[j].this_output * @layers[i+1].nodes[k].this_backward_output)
85
+ end
86
+ end
87
+ end
88
+ end
89
+
90
+ def update_weights_of_layer(layer_index)
91
+ layer_index = @weights_set.num_sets + layer_index if layer_index < 0
92
+ @weights_set.each_weights_with_index do |weights, i|
93
+ next if i != layer_index
94
+ weights.each_with_index do |wl, j|
95
+ wl.each_with_index do |w, k|
96
+ wl[k] = w - (@learning_rate * @layers[i].nodes[j].this_output * @layers[i+1].nodes[k].this_backward_output)
97
+ end
98
+ end
99
+ end
100
+ end
101
+
102
+ # def calculate_rms_error(training_inputs_set, training_outputs_set)
103
+ # accumulated_errors = 0.0
104
+ # training_inputs_set.zip(training_outputs_set).each do |t_input, t_output|
105
+ # forward_outputs = get_forward_outputs(t_input)
106
+ # total_error_of_output_nodes = 0.0
107
+ # forward_outputs.zip(t_output).each do |o, t|
108
+ # total_error_of_output_nodes += (o - t)**2 / 2.0
109
+ # end
110
+ # accumulated_errors += total_error_of_output_nodes / forward_outputs.size
111
+ # end
112
+ # Math.sqrt(2.0 * accumulated_errors / training_inputs_set.size)
113
+ # end
114
+
115
+ def learn(inputs_set, outputs_set, max_training_count=50, tolerance=0.0, monitoring_channels=[])
116
+ raise RubyBrain::Exception::TrainingDataError if inputs_set.size != outputs_set.size
117
+ # raise "inputs_set and outputs_set has different size!!!!" if inputs_set.size != outputs_set.size
118
+
119
+ best_error = 9999999999999
120
+ best_weights_array = []
121
+ max_training_count.times do |i_training|
122
+ accumulated_errors = 0.0 # for rms
123
+ inputs_set.zip(outputs_set).each do |t_input, t_output|
124
+ forward_outputs = get_forward_outputs(t_input)
125
+ # for rms start
126
+ total_error_of_output_nodes = forward_outputs.zip(t_output).reduce(0.0) do |a, output_pair|
127
+ a + ((output_pair[0] - output_pair[1])**2 / 2.0)
128
+ end
129
+ # end
130
+ accumulated_errors += total_error_of_output_nodes / forward_outputs.size
131
+ # accumulated_errors += forward_outputs.zip(t_output).reduce(0.0) { |a, output_pair| a + ((output_pair[0] - output_pair[1])**2 / 2.0) } / forward_outputs.size
132
+ # for rms end
133
+ backward_inputs = forward_outputs.zip(t_output).map { |o, t| o - t }
134
+ run_backpropagate(backward_inputs)
135
+ update_weights
136
+ end
137
+
138
+ rms_error = Math.sqrt(2.0 * accumulated_errors / inputs_set.size) # for rms
139
+ # rms_error = calculate_rms_error(inputs_set, outputs_set)
140
+ puts "--> #{rms_error} (#{i_training}/#{max_training_count})"
141
+
142
+ if rms_error < best_error
143
+ puts "update best!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
144
+ best_error = rms_error
145
+ best_weights_array = @weights_set.get_weights_as_array
146
+ end
147
+ puts "best: #{best_error}"
148
+
149
+
150
+ break if rms_error <= tolerance
151
+ end
152
+ if monitoring_channels.include? :best_params_training
153
+ File.open "best_weights_#{Time.now.to_i}.yml", 'w+' do |f|
154
+ YAML.dump(best_weights_array, f)
155
+ end
156
+ end
157
+ end
158
+
159
+
160
+ def learn2(inputs_set, outputs_set, max_training_count=50, tolerance=0.0, monitoring_channels=[])
161
+ # looks like works well for networks which has many layers... [1, 10, 10, 10, 1], [1, 100, 100, 100, 1]
162
+ # looks like NOT works well for networks which has many units in a layer... [1, 100, 1]
163
+ raise RubyBrain::Exception::TrainingDataError if inputs_set.size != outputs_set.size
164
+ # raise "inputs_set and outputs_set has different size!!!!" if inputs_set.size != outputs_set.size
165
+ initial_learning_rate = @learning_rate
166
+
167
+ rms_error = Float::INFINITY
168
+ max_training_count.times do |i_training|
169
+ accumulated_errors = 0.0 # for rms
170
+ inputs_set.zip(outputs_set).each do |t_input, t_output|
171
+ forward_outputs = get_forward_outputs(t_input)
172
+ # for rms start
173
+ total_error_of_output_nodes = forward_outputs.zip(t_output).reduce(0.0) do |a, output_pair|
174
+ a + ((output_pair[0] - output_pair[1])**2 / 2.0)
175
+ end
176
+ # end
177
+ error_of_this_training_data = total_error_of_output_nodes / forward_outputs.size
178
+ accumulated_errors += error_of_this_training_data
179
+ # accumulated_errors += forward_outputs.zip(t_output).reduce(0.0) { |a, output_pair| a + ((output_pair[0] - output_pair[1])**2 / 2.0) } / forward_outputs.size
180
+ # for rms end
181
+ # if error_of_this_training_data > rms_error**2/2.0
182
+ # @learning_rate *= 10.0
183
+ # end
184
+ backward_inputs = forward_outputs.zip(t_output).map { |o, t| o - t }
185
+ run_backpropagate(backward_inputs)
186
+ update_weights
187
+ # @learning_rate = initial_learning_rate
188
+ end
189
+
190
+ rms_error = Math.sqrt(2.0 * accumulated_errors / inputs_set.size) # for rms
191
+
192
+ # rms_error = calculate_rms_error(inputs_set, outputs_set)
193
+ puts "--> #{rms_error} (#{i_training}/#{max_training_count})"
194
+ break if rms_error <= tolerance
195
+ end
196
+ end
197
+
198
+
199
+ def learn_only_specified_layer(layer_index, inputs_set, outputs_set, max_training_count=50, tolerance=0.0)
200
+ # looks like works well for networks which has many layers... [1, 10, 10, 10, 1], [1, 100, 100, 100, 1]
201
+ # looks like NOT works well for networks which has many units in a layer... [1, 100, 1]
202
+ raise "inputs_set and outputs_set has different size!!!!" if inputs_set.size != outputs_set.size
203
+ initial_learning_rate = @learning_rate
204
+
205
+ rms_error = Float::INFINITY
206
+ max_training_count.times do |i_training|
207
+ accumulated_errors = 0.0 # for rms
208
+ inputs_set.zip(outputs_set).each do |t_input, t_output|
209
+ forward_outputs = get_forward_outputs(t_input)
210
+ # for rms start
211
+ total_error_of_output_nodes = forward_outputs.zip(t_output).reduce(0.0) do |a, output_pair|
212
+ a + ((output_pair[0] - output_pair[1])**2 / 2.0)
213
+ end
214
+ # end
215
+ error_of_this_training_data = total_error_of_output_nodes / forward_outputs.size
216
+ accumulated_errors += error_of_this_training_data
217
+ # accumulated_errors += forward_outputs.zip(t_output).reduce(0.0) { |a, output_pair| a + ((output_pair[0] - output_pair[1])**2 / 2.0) } / forward_outputs.size
218
+ # for rms end
219
+ if error_of_this_training_data > rms_error**2/2.0
220
+ @learning_rate *= 10.0
221
+ end
222
+ backward_inputs = forward_outputs.zip(t_output).map { |o, t| o - t }
223
+ run_backpropagate(backward_inputs)
224
+ update_weights_of_layer(layer_index)
225
+ @learning_rate = initial_learning_rate
226
+ end
227
+
228
+ rms_error = Math.sqrt(2.0 * accumulated_errors / inputs_set.size) # for rms
229
+
230
+ # rms_error = calculate_rms_error(inputs_set, outputs_set)
231
+ puts "--> #{rms_error} (#{i_training}/#{max_training_count})"
232
+ break if rms_error <= tolerance
233
+ end
234
+ end
235
+
236
+
237
+ def dump_weights
238
+ @weights_set.each_weights do |weights|
239
+ pp weights
240
+ end
241
+ end
242
+
243
+ def dump_weights_to_yaml(file_name=nil)
244
+ @weights_set.dump_to_yaml(file_name)
245
+ end
246
+
247
+ def load_weights_from_yaml_file(yaml_file)
248
+ @weights_set.load_from_yaml_file(yaml_file)
249
+ end
250
+
251
+ end
252
+ end