ai4r 1.13 → 2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +174 -0
  3. data/examples/classifiers/hyperpipes_data.csv +14 -0
  4. data/examples/classifiers/hyperpipes_example.rb +22 -0
  5. data/examples/classifiers/ib1_example.rb +12 -0
  6. data/examples/classifiers/id3_example.rb +15 -10
  7. data/examples/classifiers/id3_graphviz_example.rb +17 -0
  8. data/examples/classifiers/logistic_regression_example.rb +11 -0
  9. data/examples/classifiers/naive_bayes_attributes_example.rb +13 -0
  10. data/examples/classifiers/naive_bayes_example.rb +12 -13
  11. data/examples/classifiers/one_r_example.rb +27 -0
  12. data/examples/classifiers/parameter_tutorial.rb +29 -0
  13. data/examples/classifiers/prism_nominal_example.rb +15 -0
  14. data/examples/classifiers/prism_numeric_example.rb +21 -0
  15. data/examples/classifiers/simple_linear_regression_example.rb +14 -11
  16. data/examples/classifiers/zero_and_one_r_example.rb +34 -0
  17. data/examples/classifiers/zero_one_r_data.csv +8 -0
  18. data/examples/clusterers/clusterer_example.rb +40 -34
  19. data/examples/clusterers/dbscan_example.rb +17 -0
  20. data/examples/clusterers/dendrogram_example.rb +17 -0
  21. data/examples/clusterers/hierarchical_dendrogram_example.rb +20 -0
  22. data/examples/clusterers/kmeans_custom_example.rb +26 -0
  23. data/examples/genetic_algorithm/bitstring_example.rb +41 -0
  24. data/examples/genetic_algorithm/genetic_algorithm_example.rb +26 -18
  25. data/examples/genetic_algorithm/kmeans_seed_tuning.rb +45 -0
  26. data/examples/neural_network/backpropagation_example.rb +48 -48
  27. data/examples/neural_network/hopfield_example.rb +45 -0
  28. data/examples/neural_network/patterns_with_base_noise.rb +39 -39
  29. data/examples/neural_network/patterns_with_noise.rb +41 -39
  30. data/examples/neural_network/train_epochs_callback.rb +25 -0
  31. data/examples/neural_network/training_patterns.rb +39 -39
  32. data/examples/neural_network/transformer_text_classification.rb +78 -0
  33. data/examples/neural_network/xor_example.rb +23 -22
  34. data/examples/reinforcement/q_learning_example.rb +10 -0
  35. data/examples/som/som_data.rb +155 -152
  36. data/examples/som/som_multi_node_example.rb +12 -13
  37. data/examples/som/som_single_example.rb +12 -15
  38. data/examples/transformer/decode_classifier_example.rb +68 -0
  39. data/examples/transformer/deterministic_example.rb +10 -0
  40. data/examples/transformer/seq2seq_example.rb +16 -0
  41. data/lib/ai4r/classifiers/classifier.rb +24 -16
  42. data/lib/ai4r/classifiers/gradient_boosting.rb +64 -0
  43. data/lib/ai4r/classifiers/hyperpipes.rb +119 -43
  44. data/lib/ai4r/classifiers/ib1.rb +122 -32
  45. data/lib/ai4r/classifiers/id3.rb +524 -145
  46. data/lib/ai4r/classifiers/logistic_regression.rb +96 -0
  47. data/lib/ai4r/classifiers/multilayer_perceptron.rb +75 -59
  48. data/lib/ai4r/classifiers/naive_bayes.rb +95 -34
  49. data/lib/ai4r/classifiers/one_r.rb +112 -44
  50. data/lib/ai4r/classifiers/prism.rb +167 -76
  51. data/lib/ai4r/classifiers/random_forest.rb +72 -0
  52. data/lib/ai4r/classifiers/simple_linear_regression.rb +83 -58
  53. data/lib/ai4r/classifiers/support_vector_machine.rb +91 -0
  54. data/lib/ai4r/classifiers/votes.rb +57 -0
  55. data/lib/ai4r/classifiers/zero_r.rb +71 -30
  56. data/lib/ai4r/clusterers/average_linkage.rb +46 -27
  57. data/lib/ai4r/clusterers/bisecting_k_means.rb +50 -44
  58. data/lib/ai4r/clusterers/centroid_linkage.rb +52 -36
  59. data/lib/ai4r/clusterers/cluster_tree.rb +50 -0
  60. data/lib/ai4r/clusterers/clusterer.rb +29 -14
  61. data/lib/ai4r/clusterers/complete_linkage.rb +42 -31
  62. data/lib/ai4r/clusterers/dbscan.rb +134 -0
  63. data/lib/ai4r/clusterers/diana.rb +75 -49
  64. data/lib/ai4r/clusterers/k_means.rb +270 -135
  65. data/lib/ai4r/clusterers/median_linkage.rb +49 -33
  66. data/lib/ai4r/clusterers/single_linkage.rb +196 -88
  67. data/lib/ai4r/clusterers/ward_linkage.rb +51 -35
  68. data/lib/ai4r/clusterers/ward_linkage_hierarchical.rb +25 -10
  69. data/lib/ai4r/clusterers/weighted_average_linkage.rb +48 -32
  70. data/lib/ai4r/data/data_set.rb +223 -103
  71. data/lib/ai4r/data/parameterizable.rb +31 -25
  72. data/lib/ai4r/data/proximity.rb +62 -62
  73. data/lib/ai4r/data/statistics.rb +46 -35
  74. data/lib/ai4r/experiment/classifier_evaluator.rb +84 -32
  75. data/lib/ai4r/experiment/split.rb +39 -0
  76. data/lib/ai4r/genetic_algorithm/chromosome_base.rb +43 -0
  77. data/lib/ai4r/genetic_algorithm/genetic_algorithm.rb +92 -170
  78. data/lib/ai4r/genetic_algorithm/tsp_chromosome.rb +83 -0
  79. data/lib/ai4r/hmm/hidden_markov_model.rb +134 -0
  80. data/lib/ai4r/neural_network/activation_functions.rb +37 -0
  81. data/lib/ai4r/neural_network/backpropagation.rb +399 -134
  82. data/lib/ai4r/neural_network/hopfield.rb +175 -58
  83. data/lib/ai4r/neural_network/transformer.rb +194 -0
  84. data/lib/ai4r/neural_network/weight_initializations.rb +40 -0
  85. data/lib/ai4r/reinforcement/policy_iteration.rb +66 -0
  86. data/lib/ai4r/reinforcement/q_learning.rb +51 -0
  87. data/lib/ai4r/search/a_star.rb +76 -0
  88. data/lib/ai4r/search/bfs.rb +50 -0
  89. data/lib/ai4r/search/dfs.rb +50 -0
  90. data/lib/ai4r/search/mcts.rb +118 -0
  91. data/lib/ai4r/search.rb +12 -0
  92. data/lib/ai4r/som/distance_metrics.rb +29 -0
  93. data/lib/ai4r/som/layer.rb +28 -17
  94. data/lib/ai4r/som/node.rb +61 -32
  95. data/lib/ai4r/som/som.rb +158 -41
  96. data/lib/ai4r/som/two_phase_layer.rb +21 -25
  97. data/lib/ai4r/version.rb +3 -0
  98. data/lib/ai4r.rb +57 -28
  99. metadata +79 -109
  100. data/README.rdoc +0 -39
  101. data/test/classifiers/hyperpipes_test.rb +0 -84
  102. data/test/classifiers/ib1_test.rb +0 -78
  103. data/test/classifiers/id3_test.rb +0 -220
  104. data/test/classifiers/multilayer_perceptron_test.rb +0 -79
  105. data/test/classifiers/naive_bayes_test.rb +0 -43
  106. data/test/classifiers/one_r_test.rb +0 -62
  107. data/test/classifiers/prism_test.rb +0 -85
  108. data/test/classifiers/simple_linear_regression_test.rb +0 -37
  109. data/test/classifiers/zero_r_test.rb +0 -50
  110. data/test/clusterers/average_linkage_test.rb +0 -51
  111. data/test/clusterers/bisecting_k_means_test.rb +0 -66
  112. data/test/clusterers/centroid_linkage_test.rb +0 -53
  113. data/test/clusterers/complete_linkage_test.rb +0 -57
  114. data/test/clusterers/diana_test.rb +0 -69
  115. data/test/clusterers/k_means_test.rb +0 -167
  116. data/test/clusterers/median_linkage_test.rb +0 -53
  117. data/test/clusterers/single_linkage_test.rb +0 -122
  118. data/test/clusterers/ward_linkage_hierarchical_test.rb +0 -81
  119. data/test/clusterers/ward_linkage_test.rb +0 -53
  120. data/test/clusterers/weighted_average_linkage_test.rb +0 -53
  121. data/test/data/data_set_test.rb +0 -104
  122. data/test/data/proximity_test.rb +0 -87
  123. data/test/data/statistics_test.rb +0 -65
  124. data/test/experiment/classifier_evaluator_test.rb +0 -76
  125. data/test/genetic_algorithm/chromosome_test.rb +0 -57
  126. data/test/genetic_algorithm/genetic_algorithm_test.rb +0 -81
  127. data/test/neural_network/backpropagation_test.rb +0 -82
  128. data/test/neural_network/hopfield_test.rb +0 -72
  129. data/test/som/som_test.rb +0 -97
@@ -1,173 +1,354 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Author:: Sergio Fierens
2
4
  # License:: MPL 1.1
3
5
  # Project:: ai4r
4
- # Url:: http://ai4r.org/
6
+ # Url:: https://github.com/SergioFierens/ai4r
5
7
  #
6
- # You can redistribute it and/or modify it under the terms of
7
- # the Mozilla Public License version 1.1 as published by the
8
+ # You can redistribute it and/or modify it under the terms of
9
+ # the Mozilla Public License version 1.1 as published by the
8
10
  # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
11
 
10
- require File.dirname(__FILE__) + '/../data/parameterizable'
12
+ require_relative '../data/parameterizable'
13
+ require_relative 'activation_functions'
14
+ require_relative 'weight_initializations'
11
15
 
12
16
  module Ai4r
13
-
14
- # Artificial Neural Networks are mathematical or computational models based on
15
- # biological neural networks.
16
- #
17
+ # Artificial Neural Networks are mathematical or computational models based on
18
+ # biological neural networks.
19
+ #
17
20
  # More about neural networks:
18
- #
21
+ #
19
22
  # * http://en.wikipedia.org/wiki/Artificial_neural_network
20
23
  #
21
24
  module NeuralNetwork
22
-
23
25
  # = Introduction
24
- #
26
+ #
25
27
  # This is an implementation of a multilayer perceptron network, using
26
28
  # the backpropagation algorithm for learning.
27
- #
28
- # Backpropagation is a supervised learning technique (described
29
- # by Paul Werbos in 1974, and further developed by David E.
29
+ #
30
+ # Backpropagation is a supervised learning technique (described
31
+ # by Paul Werbos in 1974, and further developed by David E.
30
32
  # Rumelhart, Geoffrey E. Hinton and Ronald J. Williams in 1986)
31
- #
33
+ #
32
34
  # = Features
33
- #
35
+ #
34
36
  # * Support for any network architecture (number of layers and neurons)
35
37
  # * Configurable propagation function
36
- # * Optional usage of bias
38
+ # * Optional usage of bias
37
39
  # * Configurable momentum
38
40
  # * Configurable learning rate
39
41
  # * Configurable initial weight function
40
42
  # * 100% ruby code, no external dependency
41
- #
43
+ #
42
44
  # = Parameters
43
- #
45
+ #
44
46
  # Use class method get_parameters_info to obtain details on the algorithm
45
47
  # parameters. Use set_parameters to set values for this parameters.
46
- #
48
+ #
47
49
  # * :disable_bias => If true, the algorithm will not use bias nodes.
48
50
  # False by default.
49
- # * :initial_weight_function => f(n, i, j) must return the initial
50
- # weight for the conection between the node i in layer n, and node j in
51
+ # * :initial_weight_function => f(n, i, j) must return the initial
52
+ # weight for the conection between the node i in layer n, and node j in
51
53
  # layer n+1. By default a random number in [-1, 1) range.
52
- # * :propagation_function => By default:
54
+ # * :propagation_function => By default:
53
55
  # lambda { |x| 1/(1+Math.exp(-1*(x))) }
54
- # * :derivative_propagation_function => Derivative of the propagation
55
- # function, based on propagation function output.
56
+ # * :derivative_propagation_function => Derivative of the propagation
57
+ # function, based on propagation function output.
56
58
  # By default: lambda { |y| y*(1-y) }, where y=propagation_function(x)
57
- # * :learning_rate => By default 0.25
59
+ # * :activation => Built-in activation name (:sigmoid, :tanh or :relu).
60
+ # Selecting this overrides propagation_function and derivative_propagation_function.
61
+ # Default: :sigmoid
62
+ # * :learning_rate => By default 0.25
58
63
  # * :momentum => By default 0.1. Set this parameter to 0 to disable
59
64
  # momentum
60
- #
65
+ #
61
66
  # = How to use it
62
- #
67
+ #
63
68
  # # Create the network with 4 inputs, 1 hidden layer with 3 neurons,
64
69
  # # and 2 outputs
65
- # net = Ai4r::NeuralNetwork::Backpropagation.new([4, 3, 2])
70
+ # net = Ai4r::NeuralNetwork::Backpropagation.new([4, 3, 2])
66
71
  #
67
- # # Train the network
72
+ # # Train the network
68
73
  # 1000.times do |i|
69
74
  # net.train(example[i], result[i])
70
75
  # end
71
- #
76
+ #
72
77
  # # Use it: Evaluate data with the trained network
73
- # net.eval([12, 48, 12, 25])
74
- # => [0.86, 0.01]
75
- #
78
+ # net.eval([12, 48, 12, 25])
79
+ # => [0.86, 0.01]
80
+ #
76
81
  # More about multilayer perceptron neural networks and backpropagation:
77
- #
82
+ #
78
83
  # * http://en.wikipedia.org/wiki/Backpropagation
79
84
  # * http://en.wikipedia.org/wiki/Multilayer_perceptron
80
- #
85
+ #
81
86
  # = About the project
82
87
  # Author:: Sergio Fierens
83
88
  # License:: MPL 1.1
84
- # Url:: http://ai4r.org
89
+ # Url:: https://github.com/SergioFierens/ai4r
85
90
  class Backpropagation
86
-
87
91
  include Ai4r::Data::Parameterizable
88
-
89
- parameters_info :disable_bias => "If true, the algorithm will not use "+
90
- "bias nodes. False by default.",
91
- :initial_weight_function => "f(n, i, j) must return the initial "+
92
- "weight for the conection between the node i in layer n, and "+
93
- "node j in layer n+1. By default a random number in [-1, 1) range.",
94
- :propagation_function => "By default: " +
95
- "lambda { |x| 1/(1+Math.exp(-1*(x))) }",
96
- :derivative_propagation_function => "Derivative of the propagation "+
97
- "function, based on propagation function output. By default: " +
98
- "lambda { |y| y*(1-y) }, where y=propagation_function(x)",
99
- :learning_rate => "By default 0.25",
100
- :momentum => "By default 0.1. Set this parameter to 0 to disable "+
101
- "momentum."
102
-
92
+
103
93
  attr_accessor :structure, :weights, :activation_nodes, :last_changes
104
-
94
+
95
+ # When the activation parameter changes, update internal lambdas for each
96
+ # layer. Accepts a single symbol or an array of symbols (one for each
97
+ # layer except the input layer).
98
+ # @param symbols [Object]
99
+ # @return [Object]
100
+ def activation=(symbols)
101
+ symbols = [symbols] unless symbols.is_a?(Array)
102
+ layer_count = @structure.length - 1
103
+ if symbols.length == 1
104
+ symbols = Array.new(layer_count, symbols.first)
105
+ elsif symbols.length != layer_count
106
+ raise ArgumentError, "Activation array size must match number of layers (#{layer_count})"
107
+ end
108
+ @activation = symbols
109
+ @propagation_functions = @activation.map do |a|
110
+ Ai4r::NeuralNetwork::ActivationFunctions::FUNCTIONS[a] ||
111
+ Ai4r::NeuralNetwork::ActivationFunctions::FUNCTIONS[:sigmoid]
112
+ end
113
+ @derivative_functions = @activation.map do |a|
114
+ Ai4r::NeuralNetwork::ActivationFunctions::DERIVATIVES[a] ||
115
+ Ai4r::NeuralNetwork::ActivationFunctions::DERIVATIVES[:sigmoid]
116
+ end
117
+ end
118
+
119
+ # @return [Object]
120
+ def activation
121
+ if @activation.is_a?(Array)
122
+ if @set_by_loss || (@loss_function == :cross_entropy && @activation_overridden)
123
+ @activation.first
124
+ else
125
+ @activation
126
+ end
127
+ else
128
+ @activation
129
+ end
130
+ end
131
+
132
+ # @param symbol [Object]
133
+ # @return [Object]
134
+ def weight_init=(symbol)
135
+ @weight_init = symbol
136
+ @initial_weight_function = case symbol
137
+ when :xavier
138
+ Ai4r::NeuralNetwork::WeightInitializations.xavier(@structure)
139
+ when :he
140
+ Ai4r::NeuralNetwork::WeightInitializations.he(@structure)
141
+ else
142
+ Ai4r::NeuralNetwork::WeightInitializations.uniform
143
+ end
144
+ end
145
+
146
+ # @param symbol [Object]
147
+ # @return [Object]
148
+ def loss_function=(symbol)
149
+ @loss_function = symbol
150
+ return unless symbol == :cross_entropy && !@activation_overridden && !@custom_propagation
151
+
152
+ @set_by_loss = true
153
+ self.activation = :softmax
154
+ @activation_overridden = false
155
+ end
156
+
105
157
  # Creates a new network specifying the its architecture.
106
158
  # E.g.
107
- #
159
+ #
108
160
  # net = Backpropagation.new([4, 3, 2]) # 4 inputs
109
- # # 1 hidden layer with 3 neurons,
110
- # # 2 outputs
161
+ # # 1 hidden layer with 3 neurons,
162
+ # # 2 outputs
111
163
  # net = Backpropagation.new([2, 3, 3, 4]) # 2 inputs
112
- # # 2 hidden layer with 3 neurons each,
113
- # # 4 outputs
164
+ # # 2 hidden layer with 3 neurons each,
165
+ # # 4 outputs
114
166
  # net = Backpropagation.new([2, 1]) # 2 inputs
115
167
  # # No hidden layer
116
- # # 1 output
117
- def initialize(network_structure)
168
+ # # 1 output
169
+ # @param network_structure [Object]
170
+ # @param activation [Object]
171
+ # @param weight_init [Object]
172
+ # @return [Object]
173
+ def initialize(network_structure, activation = :sigmoid, weight_init = :uniform)
118
174
  @structure = network_structure
119
- @initial_weight_function = lambda { |n, i, j| ((rand 2000)/1000.0) - 1}
120
- @propagation_function = lambda { |x| 1/(1+Math.exp(-1*(x))) } #lambda { |x| Math.tanh(x) }
121
- @derivative_propagation_function = lambda { |y| y*(1-y) } #lambda { |y| 1.0 - y**2 }
175
+ self.weight_init = weight_init
176
+ @custom_propagation = false
177
+ @set_by_loss = true
178
+ self.activation = activation
179
+ @activation_overridden = (activation != :sigmoid)
180
+ @set_by_loss = false
122
181
  @disable_bias = false
123
182
  @learning_rate = 0.25
124
183
  @momentum = 0.1
184
+ @loss_function = :mse
125
185
  end
126
186
 
127
- # Evaluates the input.
128
- # E.g.
129
- # net = Backpropagation.new([4, 3, 2])
130
187
  # net.eval([25, 32.3, 12.8, 1.5])
131
188
  # # => [0.83, 0.03]
189
+ # @param input_values [Object]
190
+ # @return [Object]
132
191
  def eval(input_values)
133
192
  check_input_dimension(input_values.length)
134
- init_network if !@weights
193
+ init_network unless @weights
135
194
  feedforward(input_values)
136
- return @activation_nodes.last.clone
195
+ @activation_nodes.last.clone
137
196
  end
138
-
197
+
139
198
  # Evaluates the input and returns most active node
140
199
  # E.g.
141
200
  # net = Backpropagation.new([4, 3, 2])
142
201
  # net.eval_result([25, 32.3, 12.8, 1.5])
143
202
  # # eval gives [0.83, 0.03]
144
203
  # # => 0
204
+ # @param input_values [Object]
205
+ # @return [Object]
145
206
  def eval_result(input_values)
146
207
  result = eval(input_values)
147
208
  result.index(result.max)
148
209
  end
149
-
210
+
150
211
  # This method trains the network using the backpropagation algorithm.
151
- #
212
+ #
152
213
  # input: Networks input
153
- #
214
+ #
154
215
  # output: Expected output for the given input.
155
216
  #
156
- # This method returns the network error:
157
- # => 0.5 * sum( (expected_value[i] - output_value[i])**2 )
217
+ # This method returns the training loss according to +loss_function+.
218
+ # @param inputs [Object]
219
+ # @param outputs [Object]
220
+ # @return [Object]
158
221
  def train(inputs, outputs)
159
222
  eval(inputs)
160
223
  backpropagate(outputs)
161
- calculate_error(outputs)
224
+ calculate_loss(outputs, @activation_nodes.last)
225
+ end
226
+
227
+ # Train a list of input/output pairs and return average loss.
228
+ # @param batch_inputs [Object]
229
+ # @param batch_outputs [Object]
230
+ # @return [Object]
231
+ def train_batch(batch_inputs, batch_outputs)
232
+ if batch_inputs.length != batch_outputs.length
233
+ raise ArgumentError,
234
+ 'Inputs and outputs size mismatch'
235
+ end
236
+
237
+ batch_size = batch_inputs.length
238
+ init_network unless @weights
239
+
240
+ accumulated_changes = Array.new(@weights.length) do |w|
241
+ Array.new(@weights[w].length) do |i|
242
+ Array.new(@weights[w][i].length, 0.0)
243
+ end
244
+ end
245
+
246
+ sum_error = 0.0
247
+ batch_inputs.each_index do |idx|
248
+ inputs = batch_inputs[idx]
249
+ outputs = batch_outputs[idx]
250
+ eval(inputs)
251
+ calculate_output_deltas(outputs)
252
+ calculate_internal_deltas
253
+
254
+ (@weights.length - 1).downto(0) do |n|
255
+ @weights[n].each_index do |i|
256
+ @weights[n][i].each_index do |j|
257
+ change = @deltas[n][j] * @activation_nodes[n][i]
258
+ accumulated_changes[n][i][j] += change
259
+ end
260
+ end
261
+ end
262
+
263
+ sum_error += calculate_loss(outputs, @activation_nodes.last)
264
+ end
265
+
266
+ (@weights.length - 1).downto(0) do |n|
267
+ @weights[n].each_index do |i|
268
+ @weights[n][i].each_index do |j|
269
+ avg_change = accumulated_changes[n][i][j] / batch_size.to_f
270
+ @weights[n][i][j] += (learning_rate * avg_change) + (momentum * @last_changes[n][i][j])
271
+ @last_changes[n][i][j] = avg_change
272
+ end
273
+ end
274
+ end
275
+
276
+ sum_error / batch_size.to_f
277
+ end
278
+
279
+ # Train for a number of epochs over the dataset. Optionally define a batch size.
280
+ # Data can be shuffled between epochs passing +shuffle: true+ (default).
281
+ # Use +random_seed+ to make shuffling deterministic.
282
+ # Returns an array with the average loss of each epoch.
283
+ # @return [Object]
284
+ def train_epochs(data_inputs, data_outputs, epochs:, batch_size: 1,
285
+ early_stopping_patience: nil, min_delta: 0.0,
286
+ shuffle: true, random_seed: nil, &block)
287
+ if data_inputs.length != data_outputs.length
288
+ raise ArgumentError,
289
+ 'Inputs and outputs size mismatch'
290
+ end
291
+
292
+ losses = []
293
+ best_loss = Float::INFINITY
294
+ patience = early_stopping_patience
295
+ patience_counter = 0
296
+ rng = random_seed.nil? ? Random.new : Random.new(random_seed)
297
+ epochs.times do |epoch|
298
+ epoch_error = 0.0
299
+ epoch_inputs = data_inputs
300
+ epoch_outputs = data_outputs
301
+ if shuffle
302
+ indices = (0...data_inputs.length).to_a.shuffle(random: rng)
303
+ epoch_inputs = data_inputs.values_at(*indices)
304
+ epoch_outputs = data_outputs.values_at(*indices)
305
+ end
306
+ index = 0
307
+ while index < epoch_inputs.length
308
+ batch_in = epoch_inputs[index, batch_size]
309
+ batch_out = epoch_outputs[index, batch_size]
310
+ batch_error = train_batch(batch_in, batch_out)
311
+ epoch_error += batch_error * batch_in.length
312
+ index += batch_size
313
+ end
314
+ epoch_loss = epoch_error / data_inputs.length.to_f
315
+ losses << epoch_loss
316
+ if block
317
+ if block.arity >= 3
318
+ correct = 0
319
+ data_inputs.each_index do |i|
320
+ output = eval(data_inputs[i])
321
+ predicted = output.index(output.max)
322
+ expected = data_outputs[i].index(data_outputs[i].max)
323
+ correct += 1 if predicted == expected
324
+ end
325
+ accuracy = correct.to_f / data_inputs.length
326
+ block.call(epoch, epoch_loss, accuracy)
327
+ else
328
+ block.call(epoch, epoch_loss)
329
+ end
330
+ end
331
+ if patience
332
+ if best_loss - epoch_loss > min_delta
333
+ best_loss = epoch_loss
334
+ patience_counter = 0
335
+ else
336
+ patience_counter += 1
337
+ break if patience_counter >= patience
338
+ end
339
+ end
340
+ end
341
+ losses
162
342
  end
163
-
164
- # Initialize (or reset) activation nodes and weights, with the
343
+
344
+ # Initialize (or reset) activation nodes and weights, with the
165
345
  # provided net structure and parameters.
346
+ # @return [Object]
166
347
  def init_network
167
348
  init_activation_nodes
168
349
  init_weights
169
350
  init_last_changes
170
- return self
351
+ self
171
352
  end
172
353
 
173
354
  protected
@@ -179,6 +360,7 @@ module Ai4r
179
360
  # * propagation_function
180
361
  # * derivative_propagation_function
181
362
  # you must restore their values manually after loading the instance.
363
+ # @return [Object]
182
364
  def marshal_dump
183
365
  [
184
366
  @structure,
@@ -187,10 +369,13 @@ module Ai4r
187
369
  @momentum,
188
370
  @weights,
189
371
  @last_changes,
190
- @activation_nodes
372
+ @activation_nodes,
373
+ @activation
191
374
  ]
192
375
  end
193
376
 
377
+ # @param ary [Object]
378
+ # @return [Object]
194
379
  def marshal_load(ary)
195
380
  @structure,
196
381
  @disable_bias,
@@ -198,140 +383,220 @@ module Ai4r
198
383
  @momentum,
199
384
  @weights,
200
385
  @last_changes,
201
- @activation_nodes = ary
202
- @initial_weight_function = lambda { |n, i, j| ((rand 2000)/1000.0) - 1}
203
- @propagation_function = lambda { |x| 1/(1+Math.exp(-1*(x))) } #lambda { |x| Math.tanh(x) }
204
- @derivative_propagation_function = lambda { |y| y*(1-y) } #lambda { |y| 1.0 - y**2 }
386
+ @activation_nodes,
387
+ @activation = ary
388
+ self.weight_init = :uniform
389
+ self.activation = @activation || :sigmoid
205
390
  end
206
391
 
207
-
208
392
  # Propagate error backwards
393
+ # @param expected_output_values [Object]
394
+ # @return [Object]
209
395
  def backpropagate(expected_output_values)
210
396
  check_output_dimension(expected_output_values.length)
211
397
  calculate_output_deltas(expected_output_values)
212
398
  calculate_internal_deltas
213
399
  update_weights
214
400
  end
215
-
401
+
216
402
  # Propagate values forward
403
+ # @param input_values [Object]
404
+ # @return [Object]
217
405
  def feedforward(input_values)
218
- input_values.each_index do |input_index|
406
+ input_values.each_index do |input_index|
219
407
  @activation_nodes.first[input_index] = input_values[input_index]
220
408
  end
221
409
  @weights.each_index do |n|
222
- @structure[n+1].times do |j|
223
- sum = 0.0
410
+ sums = Array.new(@structure[n + 1], 0.0)
411
+ @structure[n + 1].times do |j|
224
412
  @activation_nodes[n].each_index do |i|
225
- sum += (@activation_nodes[n][i] * @weights[n][i][j])
413
+ sums[j] += (@activation_nodes[n][i] * @weights[n][i][j])
226
414
  end
227
- @activation_nodes[n+1][j] = @propagation_function.call(sum)
228
415
  end
229
- end
416
+ if @activation[n] == :softmax
417
+ values = @propagation_functions[n].call(sums)
418
+ values.each_index { |j| @activation_nodes[n + 1][j] = values[j] }
419
+ else
420
+ sums.each_index do |j|
421
+ @activation_nodes[n + 1][j] = @propagation_functions[n].call(sums[j])
422
+ end
423
+ end
424
+ end
230
425
  end
231
-
426
+
232
427
  # Initialize neurons structure.
428
+ # @return [Object]
233
429
  def init_activation_nodes
234
- @activation_nodes = Array.new(@structure.length) do |n|
430
+ @activation_nodes = Array.new(@structure.length) do |n|
235
431
  Array.new(@structure[n], 1.0)
236
432
  end
237
- if not disable_bias
238
- @activation_nodes[0...-1].each {|layer| layer << 1.0 }
239
- end
433
+ return if disable_bias
434
+
435
+ @activation_nodes[0...-1].each { |layer| layer << 1.0 }
240
436
  end
241
-
437
+
242
438
  # Initialize the weight arrays using function specified with the
243
439
  # initial_weight_function parameter
440
+ # @return [Object]
244
441
  def init_weights
245
- @weights = Array.new(@structure.length-1) do |i|
442
+ @weights = Array.new(@structure.length - 1) do |i|
246
443
  nodes_origin = @activation_nodes[i].length
247
- nodes_target = @structure[i+1]
444
+ nodes_target = @structure[i + 1]
248
445
  Array.new(nodes_origin) do |j|
249
- Array.new(nodes_target) do |k|
446
+ Array.new(nodes_target) do |k|
250
447
  @initial_weight_function.call(i, j, k)
251
448
  end
252
449
  end
253
450
  end
254
- end
451
+ end
255
452
 
256
- # Momentum usage need to know how much a weight changed in the
257
- # previous training. This method initialize the @last_changes
453
+ # Momentum usage need to know how much a weight changed in the
454
+ # previous training. This method initialize the @last_changes
258
455
  # structure with 0 values.
456
+ # @return [Object]
259
457
  def init_last_changes
260
458
  @last_changes = Array.new(@weights.length) do |w|
261
- Array.new(@weights[w].length) do |i|
459
+ Array.new(@weights[w].length) do |i|
262
460
  Array.new(@weights[w][i].length, 0.0)
263
461
  end
264
462
  end
265
463
  end
266
-
464
+
267
465
  # Calculate deltas for output layer
466
+ # @param expected_values [Object]
467
+ # @return [Object]
268
468
  def calculate_output_deltas(expected_values)
269
469
  output_values = @activation_nodes.last
270
470
  output_deltas = []
471
+ func = @derivative_functions.last
271
472
  output_values.each_index do |output_index|
272
- error = expected_values[output_index] - output_values[output_index]
273
- output_deltas << @derivative_propagation_function.call(
274
- output_values[output_index]) * error
473
+ if @loss_function == :cross_entropy && @activation == :softmax
474
+ output_deltas << (output_values[output_index] - expected_values[output_index])
475
+ else
476
+ error = expected_values[output_index] - output_values[output_index]
477
+ output_deltas << (func.call(output_values[output_index]) * error)
478
+ end
275
479
  end
276
480
  @deltas = [output_deltas]
277
481
  end
278
-
482
+
279
483
  # Calculate deltas for hidden layers
484
+ # @return [Object]
280
485
  def calculate_internal_deltas
281
486
  prev_deltas = @deltas.last
282
- (@activation_nodes.length-2).downto(1) do |layer_index|
487
+ (@activation_nodes.length - 2).downto(1) do |layer_index|
283
488
  layer_deltas = []
284
489
  @activation_nodes[layer_index].each_index do |j|
285
490
  error = 0.0
286
- @structure[layer_index+1].times do |k|
491
+ @structure[layer_index + 1].times do |k|
287
492
  error += prev_deltas[k] * @weights[layer_index][j][k]
288
493
  end
289
- layer_deltas[j] = (@derivative_propagation_function.call(
290
- @activation_nodes[layer_index][j]) * error)
494
+ func = @derivative_functions[layer_index - 1]
495
+ layer_deltas[j] = func.call(@activation_nodes[layer_index][j]) * error
291
496
  end
292
497
  prev_deltas = layer_deltas
293
498
  @deltas.unshift(layer_deltas)
294
499
  end
295
500
  end
296
-
501
+
297
502
  # Update weights after @deltas have been calculated.
503
+ # @return [Object]
298
504
  def update_weights
299
- (@weights.length-1).downto(0) do |n|
300
- @weights[n].each_index do |i|
301
- @weights[n][i].each_index do |j|
302
- change = @deltas[n][j]*@activation_nodes[n][i]
303
- @weights[n][i][j] += ( learning_rate * change +
304
- momentum * @last_changes[n][i][j])
505
+ (@weights.length - 1).downto(0) do |n|
506
+ @weights[n].each_index do |i|
507
+ @weights[n][i].each_index do |j|
508
+ change = @deltas[n][j] * @activation_nodes[n][i]
509
+ @weights[n][i][j] += ((learning_rate * change) +
510
+ (momentum * @last_changes[n][i][j]))
305
511
  @last_changes[n][i][j] = change
306
512
  end
307
513
  end
308
514
  end
309
515
  end
310
-
311
- # Calculate quadratic error for a expected output value
516
+
517
+ # Calculate quadratic error for an expected output value
312
518
  # Error = 0.5 * sum( (expected_value[i] - output_value[i])**2 )
519
+ # @param expected_output [Object]
520
+ # @return [Object]
313
521
  def calculate_error(expected_output)
314
522
  output_values = @activation_nodes.last
315
523
  error = 0.0
316
524
  expected_output.each_index do |output_index|
317
- error +=
318
- 0.5*(output_values[output_index]-expected_output[output_index])**2
525
+ error +=
526
+ 0.5 * ((output_values[output_index] - expected_output[output_index])**2)
527
+ end
528
+ error
529
+ end
530
+
531
+ # Calculate loss for expected/actual vectors according to selected
532
+ # loss_function (:mse or :cross_entropy).
533
+ # @param expected [Object]
534
+ # @param actual [Object]
535
+ # @return [Object]
536
+ def calculate_loss(expected, actual)
537
+ case @loss_function
538
+ when :cross_entropy
539
+ epsilon = 1e-12
540
+ loss = 0.0
541
+ if @activation == :softmax
542
+ expected.each_index do |i|
543
+ p = [[actual[i], epsilon].max, 1 - epsilon].min
544
+ loss -= expected[i] * Math.log(p)
545
+ end
546
+ else
547
+ expected.each_index do |i|
548
+ p = [[actual[i], epsilon].max, 1 - epsilon].min
549
+ loss -= (expected[i] * Math.log(p)) + ((1 - expected[i]) * Math.log(1 - p))
550
+ end
551
+ end
552
+ loss
553
+ else
554
+ # Mean squared error
555
+ error = 0.0
556
+ expected.each_index do |i|
557
+ error += 0.5 * ((expected[i] - actual[i])**2)
558
+ end
559
+ error
319
560
  end
320
- return error
321
561
  end
322
-
562
+
563
+ # @param inputs [Object]
564
+ # @return [Object]
323
565
  def check_input_dimension(inputs)
324
- raise ArgumentError, "Wrong number of inputs. " +
325
- "Expected: #{@structure.first}, " +
326
- "received: #{inputs}." if inputs!=@structure.first
566
+ return unless inputs != @structure.first
567
+
568
+ raise ArgumentError, 'Wrong number of inputs. ' \
569
+ "Expected: #{@structure.first}, " \
570
+ "received: #{inputs}."
327
571
  end
328
572
 
573
+ # @param outputs [Object]
574
+ # @return [Object]
329
575
  def check_output_dimension(outputs)
330
- raise ArgumentError, "Wrong number of outputs. " +
331
- "Expected: #{@structure.last}, " +
332
- "received: #{outputs}." if outputs!=@structure.last
576
+ return unless outputs != @structure.last
577
+
578
+ raise ArgumentError, 'Wrong number of outputs. ' \
579
+ "Expected: #{@structure.last}, " \
580
+ "received: #{outputs}."
333
581
  end
334
-
582
+
583
+ parameters_info disable_bias: 'If true, the algorithm will not use ' \
584
+ 'bias nodes. False by default.',
585
+ initial_weight_function: 'f(n, i, j) must return the initial ' \
586
+ 'weight for the conection between the node i in layer n, and ' \
587
+ 'node j in layer n+1. By default a random number in [-1, 1) range.',
588
+ weight_init: 'Built-in weight initialization strategy (:uniform, :xavier or :he). Default: :uniform',
589
+ propagation_function: 'By default: ' \
590
+ 'lambda { |x| 1/(1+Math.exp(-1*(x))) }',
591
+ derivative_propagation_function: 'Derivative of the propagation ' \
592
+ 'function, based on propagation function output. By default: ' \
593
+ 'lambda { |y| y*(1-y) }, where y=propagation_function(x)',
594
+ activation: 'Activation function per layer. Provide a symbol or an array of symbols (:sigmoid, :tanh, :relu or :softmax). Default: :sigmoid',
595
+ learning_rate: 'By default 0.25',
596
+ momentum: 'By default 0.1. Set this parameter to 0 to disable ' \
597
+ 'momentum.',
598
+ loss_function: 'Loss function used when training (:mse or ' \
599
+ ':cross_entropy). Default: :mse'
335
600
  end
336
601
  end
337
602
  end