ai4ruby 1.11

Sign up to get free protection for your applications and to get access to all the features.
Files changed (79) hide show
  1. data/README.rdoc +47 -0
  2. data/examples/classifiers/id3_data.csv +121 -0
  3. data/examples/classifiers/id3_example.rb +29 -0
  4. data/examples/classifiers/naive_bayes_data.csv +11 -0
  5. data/examples/classifiers/naive_bayes_example.rb +16 -0
  6. data/examples/classifiers/results.txt +31 -0
  7. data/examples/genetic_algorithm/genetic_algorithm_example.rb +37 -0
  8. data/examples/genetic_algorithm/travel_cost.csv +16 -0
  9. data/examples/neural_network/backpropagation_example.rb +67 -0
  10. data/examples/neural_network/patterns_with_base_noise.rb +68 -0
  11. data/examples/neural_network/patterns_with_noise.rb +66 -0
  12. data/examples/neural_network/training_patterns.rb +68 -0
  13. data/examples/neural_network/xor_example.rb +35 -0
  14. data/examples/som/som_data.rb +156 -0
  15. data/examples/som/som_multi_node_example.rb +22 -0
  16. data/examples/som/som_single_example.rb +24 -0
  17. data/lib/ai4r.rb +33 -0
  18. data/lib/ai4r/classifiers/classifier.rb +62 -0
  19. data/lib/ai4r/classifiers/hyperpipes.rb +118 -0
  20. data/lib/ai4r/classifiers/ib1.rb +121 -0
  21. data/lib/ai4r/classifiers/id3.rb +326 -0
  22. data/lib/ai4r/classifiers/multilayer_perceptron.rb +135 -0
  23. data/lib/ai4r/classifiers/naive_bayes.rb +259 -0
  24. data/lib/ai4r/classifiers/one_r.rb +110 -0
  25. data/lib/ai4r/classifiers/prism.rb +197 -0
  26. data/lib/ai4r/classifiers/zero_r.rb +73 -0
  27. data/lib/ai4r/clusterers/average_linkage.rb +59 -0
  28. data/lib/ai4r/clusterers/bisecting_k_means.rb +93 -0
  29. data/lib/ai4r/clusterers/centroid_linkage.rb +66 -0
  30. data/lib/ai4r/clusterers/clusterer.rb +61 -0
  31. data/lib/ai4r/clusterers/complete_linkage.rb +67 -0
  32. data/lib/ai4r/clusterers/diana.rb +139 -0
  33. data/lib/ai4r/clusterers/k_means.rb +126 -0
  34. data/lib/ai4r/clusterers/median_linkage.rb +61 -0
  35. data/lib/ai4r/clusterers/single_linkage.rb +194 -0
  36. data/lib/ai4r/clusterers/ward_linkage.rb +64 -0
  37. data/lib/ai4r/clusterers/ward_linkage_hierarchical.rb +31 -0
  38. data/lib/ai4r/clusterers/weighted_average_linkage.rb +61 -0
  39. data/lib/ai4r/data/data_set.rb +266 -0
  40. data/lib/ai4r/data/parameterizable.rb +64 -0
  41. data/lib/ai4r/data/proximity.rb +100 -0
  42. data/lib/ai4r/data/statistics.rb +77 -0
  43. data/lib/ai4r/experiment/classifier_evaluator.rb +95 -0
  44. data/lib/ai4r/genetic_algorithm/genetic_algorithm.rb +270 -0
  45. data/lib/ai4r/neural_network/backpropagation.rb +326 -0
  46. data/lib/ai4r/neural_network/hopfield.rb +149 -0
  47. data/lib/ai4r/som/layer.rb +68 -0
  48. data/lib/ai4r/som/node.rb +96 -0
  49. data/lib/ai4r/som/som.rb +155 -0
  50. data/lib/ai4r/som/two_phase_layer.rb +90 -0
  51. data/test/classifiers/hyperpipes_test.rb +84 -0
  52. data/test/classifiers/ib1_test.rb +78 -0
  53. data/test/classifiers/id3_test.rb +208 -0
  54. data/test/classifiers/multilayer_perceptron_test.rb +79 -0
  55. data/test/classifiers/naive_bayes_test.rb +43 -0
  56. data/test/classifiers/one_r_test.rb +62 -0
  57. data/test/classifiers/prism_test.rb +85 -0
  58. data/test/classifiers/zero_r_test.rb +49 -0
  59. data/test/clusterers/average_linkage_test.rb +51 -0
  60. data/test/clusterers/bisecting_k_means_test.rb +66 -0
  61. data/test/clusterers/centroid_linkage_test.rb +53 -0
  62. data/test/clusterers/complete_linkage_test.rb +57 -0
  63. data/test/clusterers/diana_test.rb +69 -0
  64. data/test/clusterers/k_means_test.rb +100 -0
  65. data/test/clusterers/median_linkage_test.rb +53 -0
  66. data/test/clusterers/single_linkage_test.rb +122 -0
  67. data/test/clusterers/ward_linkage_hierarchical_test.rb +61 -0
  68. data/test/clusterers/ward_linkage_test.rb +53 -0
  69. data/test/clusterers/weighted_average_linkage_test.rb +53 -0
  70. data/test/data/data_set_test.rb +96 -0
  71. data/test/data/proximity_test.rb +81 -0
  72. data/test/data/statistics_test.rb +65 -0
  73. data/test/experiment/classifier_evaluator_test.rb +76 -0
  74. data/test/genetic_algorithm/chromosome_test.rb +58 -0
  75. data/test/genetic_algorithm/genetic_algorithm_test.rb +81 -0
  76. data/test/neural_network/backpropagation_test.rb +82 -0
  77. data/test/neural_network/hopfield_test.rb +72 -0
  78. data/test/som/som_test.rb +97 -0
  79. metadata +168 -0
@@ -0,0 +1,326 @@
1
+ # Author:: Sergio Fierens
2
+ # License:: MPL 1.1
3
+ # Project:: ai4r
4
+ # Url:: http://ai4r.rubyforge.org/
5
+ #
6
+ # You can redistribute it and/or modify it under the terms of
7
+ # the Mozilla Public License version 1.1 as published by the
8
+ # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
+
10
+ require File.dirname(__FILE__) + '/../data/parameterizable'
11
+
12
+ module Ai4r
13
+
14
+ # Artificial Neural Networks are mathematical or computational models based on
15
+ # biological neural networks.
16
+ #
17
+ # More about neural networks:
18
+ #
19
+ # * http://en.wikipedia.org/wiki/Artificial_neural_network
20
+ #
21
+ module NeuralNetwork
22
+
23
+ # = Introduction
24
+ #
25
+ # This is an implementation of a multilayer perceptron network, using
26
+ # the backpropagation algorithm for learning.
27
+ #
28
+ # Backpropagation is a supervised learning technique (described
29
+ # by Paul Werbos in 1974, and further developed by David E.
30
+ # Rumelhart, Geoffrey E. Hinton and Ronald J. Williams in 1986)
31
+ #
32
+ # = Features
33
+ #
34
+ # * Support for any network architecture (number of layers and neurons)
35
+ # * Configurable propagation function
36
+ # * Optional usage of bias
37
+ # * Configurable momentum
38
+ # * Configurable learning rate
39
+ # * Configurable initial weight function
40
+ # * 100% ruby code, no external dependency
41
+ #
42
+ # = Parameters
43
+ #
44
+ # Use class method get_parameters_info to obtain details on the algorithm
45
+ # parameters. Use set_parameters to set values for this parameters.
46
+ #
47
+ # * :disable_bias => If true, the alforithm will not use bias nodes.
48
+ # False by default.
49
+ # * :initial_weight_function => f(n, i, j) must return the initial
50
+ # weight for the conection between the node i in layer n, and node j in
51
+ # layer n+1. By default a random number in [-1, 1) range.
52
+ # * :propagation_function => By default:
53
+ # lambda { |x| 1/(1+Math.exp(-1*(x))) }
54
+ # * :derivative_propagation_function => Derivative of the propagation
55
+ # function, based on propagation function output.
56
+ # By default: lambda { |y| y*(1-y) }, where y=propagation_function(x)
57
+ # * :learning_rate => By default 0.25
58
+ # * :momentum => By default 0.1. Set this parameter to 0 to disable
59
+ # momentum
60
+ #
61
+ # = How to use it
62
+ #
63
+ # # Create the network with 4 inputs, 1 hidden layer with 3 neurons,
64
+ # # and 2 outputs
65
+ # net = Ai4r::NeuralNetwork::Backpropagation.new([4, 3, 2])
66
+ #
67
+ # # Train the network
68
+ # 1000.times do |i|
69
+ # net.train(example[i], result[i])
70
+ # end
71
+ #
72
+ # # Use it: Evaluate data with the trained network
73
+ # net.eval([12, 48, 12, 25])
74
+ # => [0.86, 0.01]
75
+ #
76
+ # More about multilayer perceptron neural networks and backpropagation:
77
+ #
78
+ # * http://en.wikipedia.org/wiki/Backpropagation
79
+ # * http://en.wikipedia.org/wiki/Multilayer_perceptron
80
+ #
81
+ # = About the project
82
+ # Author:: Sergio Fierens
83
+ # License:: MPL 1.1
84
+ # Url:: http://ai4r.rubyforge.org
85
+ class Backpropagation
86
+
87
+ include Ai4r::Data::Parameterizable
88
+
89
+ parameters_info :disable_bias => "If true, the alforithm will not use "+
90
+ "bias nodes. False by default.",
91
+ :initial_weight_function => "f(n, i, j) must return the initial "+
92
+ "weight for the conection between the node i in layer n, and "+
93
+ "node j in layer n+1. By default a random number in [-1, 1) range.",
94
+ :propagation_function => "By default: " +
95
+ "lambda { |x| 1/(1+Math.exp(-1*(x))) }",
96
+ :derivative_propagation_function => "Derivative of the propagation "+
97
+ "function, based on propagation function output. By default: " +
98
+ "lambda { |y| y*(1-y) }, where y=propagation_function(x)",
99
+ :learning_rate => "By default 0.25",
100
+ :momentum => "By default 0.1. Set this parameter to 0 to disable "+
101
+ "momentum."
102
+
103
+ attr_accessor :structure, :weights, :activation_nodes, :last_changes
104
+
105
+ # Creates a new network specifying the its architecture.
106
+ # E.g.
107
+ #
108
+ # net = Backpropagation.new([4, 3, 2]) # 4 inputs
109
+ # # 1 hidden layer with 3 neurons,
110
+ # # 2 outputs
111
+ # net = Backpropagation.new([2, 3, 3, 4]) # 2 inputs
112
+ # # 2 hidden layer with 3 neurons each,
113
+ # # 4 outputs
114
+ # net = Backpropagation.new([2, 1]) # 2 inputs
115
+ # # No hidden layer
116
+ # # 1 output
117
+ def initialize(network_structure)
118
+ @structure = network_structure
119
+ @initial_weight_function = lambda { |n, i, j| ((rand 2000)/1000.0) - 1}
120
+ @propagation_function = lambda { |x| 1/(1+Math.exp(-1*(x))) } #lambda { |x| Math.tanh(x) }
121
+ @derivative_propagation_function = lambda { |y| y*(1-y) } #lambda { |y| 1.0 - y**2 }
122
+ @disable_bias = false
123
+ @learning_rate = 0.25
124
+ @momentum = 0.1
125
+ end
126
+
127
+ # Evaluates the input.
128
+ # E.g.
129
+ # net = Backpropagation.new([4, 3, 2])
130
+ # net.eval([25, 32.3, 12.8, 1.5])
131
+ # # => [0.83, 0.03]
132
+ def eval(input_values)
133
+ check_input_dimension(input_values.length)
134
+ init_network if !@weights
135
+ feedforward(input_values)
136
+ return @activation_nodes.last.clone
137
+ end
138
+
139
+ # This method trains the network using the backpropagation algorithm.
140
+ #
141
+ # input: Networks input
142
+ #
143
+ # output: Expected output for the given input.
144
+ #
145
+ # This method returns the network error:
146
+ # => 0.5 * sum( (expected_value[i] - output_value[i])**2 )
147
+ def train(inputs, outputs)
148
+ eval(inputs)
149
+ backpropagate(outputs)
150
+ calculate_error(outputs)
151
+ end
152
+
153
+ # Initialize (or reset) activation nodes and weights, with the
154
+ # provided net structure and parameters.
155
+ def init_network
156
+ init_activation_nodes
157
+ init_weights
158
+ init_last_changes
159
+ return self
160
+ end
161
+
162
+ protected
163
+
164
+ # Custom serialization. It used to fail trying to serialize because
165
+ # it uses lambda functions internally, and they cannot be serialized.
166
+ # Now it does not fail, but if you customize the values of
167
+ # * initial_weight_function
168
+ # * propagation_function
169
+ # * derivative_propagation_function
170
+ # you must restore their values manually after loading the instance.
171
+ def marshal_dump
172
+ [
173
+ @structure,
174
+ @disable_bias,
175
+ @learning_rate,
176
+ @momentum,
177
+ @weights,
178
+ @last_changes,
179
+ @activation_nodes
180
+ ]
181
+ end
182
+
183
+ def marshal_load(ary)
184
+ @structure,
185
+ @disable_bias,
186
+ @learning_rate,
187
+ @momentum,
188
+ @weights,
189
+ @last_changes,
190
+ @activation_nodes = ary
191
+ @initial_weight_function = lambda { |n, i, j| ((rand 2000)/1000.0) - 1}
192
+ @propagation_function = lambda { |x| 1/(1+Math.exp(-1*(x))) } #lambda { |x| Math.tanh(x) }
193
+ @derivative_propagation_function = lambda { |y| y*(1-y) } #lambda { |y| 1.0 - y**2 }
194
+ end
195
+
196
+
197
+ # Propagate error backwards
198
+ def backpropagate(expected_output_values)
199
+ check_output_dimension(expected_output_values.length)
200
+ calculate_output_deltas(expected_output_values)
201
+ calculate_internal_deltas
202
+ update_weights
203
+ end
204
+
205
+ # Propagate values forward
206
+ def feedforward(input_values)
207
+ input_values.each_index do |input_index|
208
+ @activation_nodes.first[input_index] = input_values[input_index]
209
+ end
210
+ @weights.each_index do |n|
211
+ @structure[n+1].times do |j|
212
+ sum = 0.0
213
+ @activation_nodes[n].each_index do |i|
214
+ sum += (@activation_nodes[n][i] * @weights[n][i][j])
215
+ end
216
+ @activation_nodes[n+1][j] = @propagation_function.call(sum)
217
+ end
218
+ end
219
+ end
220
+
221
+ # Initialize neurons structure.
222
+ def init_activation_nodes
223
+ @activation_nodes = Array.new(@structure.length) do |n|
224
+ Array.new(@structure[n], 1.0)
225
+ end
226
+ if not disable_bias
227
+ @activation_nodes[0...-1].each {|layer| layer << 1.0 }
228
+ end
229
+ end
230
+
231
+ # Initialize the weight arrays using function specified with the
232
+ # initial_weight_function parameter
233
+ def init_weights
234
+ @weights = Array.new(@structure.length-1) do |i|
235
+ nodes_origin = @activation_nodes[i].length
236
+ nodes_target = @structure[i+1]
237
+ Array.new(nodes_origin) do |j|
238
+ Array.new(nodes_target) do |k|
239
+ @initial_weight_function.call(i, j, k)
240
+ end
241
+ end
242
+ end
243
+ end
244
+
245
+ # Momentum usage need to know how much a weight changed in the
246
+ # previous training. This method initialize the @last_changes
247
+ # structure with 0 values.
248
+ def init_last_changes
249
+ @last_changes = Array.new(@weights.length) do |w|
250
+ Array.new(@weights[w].length) do |i|
251
+ Array.new(@weights[w][i].length, 0.0)
252
+ end
253
+ end
254
+ end
255
+
256
+ # Calculate deltas for output layer
257
+ def calculate_output_deltas(expected_values)
258
+ output_values = @activation_nodes.last
259
+ output_deltas = []
260
+ output_values.each_index do |output_index|
261
+ error = expected_values[output_index] - output_values[output_index]
262
+ output_deltas << @derivative_propagation_function.call(
263
+ output_values[output_index]) * error
264
+ end
265
+ @deltas = [output_deltas]
266
+ end
267
+
268
+ # Calculate deltas for hidden layers
269
+ def calculate_internal_deltas
270
+ prev_deltas = @deltas.last
271
+ (@activation_nodes.length-2).downto(1) do |layer_index|
272
+ layer_deltas = []
273
+ @activation_nodes[layer_index].each_index do |j|
274
+ error = 0.0
275
+ @structure[layer_index+1].times do |k|
276
+ error += prev_deltas[k] * @weights[layer_index][j][k]
277
+ end
278
+ layer_deltas[j] = (@derivative_propagation_function.call(
279
+ @activation_nodes[layer_index][j]) * error)
280
+ end
281
+ prev_deltas = layer_deltas
282
+ @deltas.unshift(layer_deltas)
283
+ end
284
+ end
285
+
286
+ # Update weights after @deltas have been calculated.
287
+ def update_weights
288
+ (@weights.length-1).downto(0) do |n|
289
+ @weights[n].each_index do |i|
290
+ @weights[n][i].each_index do |j|
291
+ change = @deltas[n][j]*@activation_nodes[n][i]
292
+ @weights[n][i][j] += ( learning_rate * change +
293
+ momentum * @last_changes[n][i][j])
294
+ @last_changes[n][i][j] = change
295
+ end
296
+ end
297
+ end
298
+ end
299
+
300
+ # Calculate quadratic error for a expected output value
301
+ # Error = 0.5 * sum( (expected_value[i] - output_value[i])**2 )
302
+ def calculate_error(expected_output)
303
+ output_values = @activation_nodes.last
304
+ error = 0.0
305
+ expected_output.each_index do |output_index|
306
+ error +=
307
+ 0.5*(output_values[output_index]-expected_output[output_index])**2
308
+ end
309
+ return error
310
+ end
311
+
312
+ def check_input_dimension(inputs)
313
+ raise ArgumentError, "Wrong number of inputs. " +
314
+ "Expected: #{@structure.first}, " +
315
+ "received: #{inputs}." if inputs!=@structure.first
316
+ end
317
+
318
+ def check_output_dimension(outputs)
319
+ raise ArgumentError, "Wrong number of outputs. " +
320
+ "Expected: #{@structure.last}, " +
321
+ "received: #{outputs}." if outputs!=@structure.last
322
+ end
323
+
324
+ end
325
+ end
326
+ end
@@ -0,0 +1,149 @@
1
+ # Author:: Sergio Fierens
2
+ # License:: MPL 1.1
3
+ # Project:: ai4r
4
+ # Url:: http://ai4r.rubyforge.org/
5
+ #
6
+ # You can redistribute it and/or modify it under the terms of
7
+ # the Mozilla Public License version 1.1 as published by the
8
+ # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
+
10
+ require File.dirname(__FILE__) + '/../data/parameterizable'
11
+
12
+ module Ai4r
13
+
14
+ module NeuralNetwork
15
+
16
+ # = Hopfield Net =
17
+ #
18
+ # A Hopfield Network is a recurrent Artificial Neural Network.
19
+ # Hopfield nets are able to memorize a set of patterns, and then evaluate
20
+ # an input, returning the most similar stored pattern (although
21
+ # convergence to one of the stored patterns is not guaranteed).
22
+ # Hopfield nets are great to deal with input noise. If a system accepts a
23
+ # discrete set of inputs, but inputs are subject to noise, you can use a
24
+ # Hopfield net to eliminate noise and identified the given input.
25
+ #
26
+ # = How to Use =
27
+ #
28
+ # data_set = Ai4r::Data::DataSet.new :data_items => array_of_patterns
29
+ # net = Ai4r::NeuralNetworks::Hopfield.new.train data_set
30
+ # net.eval input
31
+ # => one of the stored patterns in array_of_patterns
32
+ class Hopfield
33
+
34
+ include Ai4r::Data::Parameterizable
35
+
36
+ attr_reader :weights, :nodes
37
+
38
+ parameters_info :eval_iterations => "The network will run for a maximum "+
39
+ "of 'eval_iterations' iterations while evaluating an input. 500 by " +
40
+ "default.",
41
+ :active_node_value => "Default: 1",
42
+ :inactive_node_value => "Default: -1",
43
+ :threshold => "Default: 0"
44
+
45
+ def initialize
46
+ @eval_iterations = 500
47
+ @active_node_value = 1
48
+ @inactive_node_value = -1
49
+ @threshold = 0
50
+ end
51
+
52
+ # Prepares the network to memorize the given data set.
53
+ # Future calls to eval (should) return one of the memorized data items.
54
+ # A Hopfield network converges to a local minimum, but converge to one
55
+ # of the "memorized" patterns is not guaranteed.
56
+ def train(data_set)
57
+ @data_set = data_set
58
+ initialize_nodes(@data_set)
59
+ initialize_weights(@data_set)
60
+ return self
61
+ end
62
+
63
+ # You can use run instead of eval to propagate values step by step.
64
+ # With this you can verify the progress of the network output with
65
+ # each step.
66
+ #
67
+ # E.g.:
68
+ # pattern = input
69
+ # 100.times do
70
+ # pattern = net.run(pattern)
71
+ # puts pattern.inspect
72
+ # end
73
+ def run(input)
74
+ set_input(input)
75
+ propagate
76
+ return @nodes
77
+ end
78
+
79
+ # Propagates the input until the network returns one of the memorized
80
+ # patterns, or a maximum of "eval_iterations" times.
81
+ def eval(input)
82
+ set_input(input)
83
+ @eval_iterations.times do
84
+ propagate
85
+ break if @data_set.data_items.include?(@nodes)
86
+ end
87
+ return @nodes
88
+ end
89
+
90
+ protected
91
+ # Set all nodes state to the given input.
92
+ # inputs parameter must have the same dimension as nodes
93
+ def set_input(inputs)
94
+ raise ArgumentError unless inputs.length == @nodes.length
95
+ inputs.each_with_index { |input, i| @nodes[i] = input}
96
+ end
97
+
98
+ # Select a single node randomly and propagate its state to all other nodes
99
+ def propagate
100
+ sum = 0
101
+ i = (rand * @nodes.length).floor
102
+ @nodes.each_with_index {|node, j| sum += read_weight(i,j)*node }
103
+ @nodes[i] = (sum > @threshold) ? @active_node_value : @inactive_node_value
104
+ end
105
+
106
+ # Initialize all nodes with "inactive" state.
107
+ def initialize_nodes(data_set)
108
+ @nodes = Array.new(data_set.data_items.first.length,
109
+ @inactive_node_value)
110
+ end
111
+
112
+ # Create a partial weigth matrix:
113
+ # [
114
+ # [w(1,0)],
115
+ # [w(2,0)], [w(2,1)],
116
+ # [w(3,0)], [w(3,1)], [w(3,2)],
117
+ # ...
118
+ # [w(n-1,0)], [w(n-1,1)], [w(n-1,2)], ... , [w(n-1,n-2)]
119
+ # ]
120
+ # where n is the number of nodes.
121
+ #
122
+ # We are saving memory here, as:
123
+ #
124
+ # * w[i][i] = 0 (no node connects with itself)
125
+ # * w[i][j] = w[j][i] (weigths are symmetric)
126
+ #
127
+ # Use read_weight(i,j) to find out weight between node i and j
128
+ def initialize_weights(data_set)
129
+ @weights = Array.new(@nodes.length-1) {|l| Array.new(l+1)}
130
+ @nodes.each_index do |i|
131
+ i.times do |j|
132
+ @weights[i-1][j] = data_set.data_items.inject(0) { |sum, item| sum+= item[i]*item[j] }
133
+ end
134
+ end
135
+ end
136
+
137
+ # read_weight(i,j) reads the weigth matrix and returns weight between
138
+ # node i and j
139
+ def read_weight(index_a, index_b)
140
+ return 0 if index_a == index_b
141
+ index_a, index_b = index_b, index_a if index_b > index_a
142
+ return @weights[index_a-1][index_b]
143
+ end
144
+
145
+ end
146
+
147
+ end
148
+
149
+ end