ai4r 1.12 → 2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +174 -0
  3. data/examples/classifiers/hyperpipes_data.csv +14 -0
  4. data/examples/classifiers/hyperpipes_example.rb +22 -0
  5. data/examples/classifiers/ib1_example.rb +12 -0
  6. data/examples/classifiers/id3_example.rb +15 -10
  7. data/examples/classifiers/id3_graphviz_example.rb +17 -0
  8. data/examples/classifiers/logistic_regression_example.rb +11 -0
  9. data/examples/classifiers/naive_bayes_attributes_example.rb +13 -0
  10. data/examples/classifiers/naive_bayes_example.rb +12 -13
  11. data/examples/classifiers/one_r_example.rb +27 -0
  12. data/examples/classifiers/parameter_tutorial.rb +29 -0
  13. data/examples/classifiers/prism_nominal_example.rb +15 -0
  14. data/examples/classifiers/prism_numeric_example.rb +21 -0
  15. data/examples/classifiers/simple_linear_regression_example.csv +159 -0
  16. data/examples/classifiers/simple_linear_regression_example.rb +18 -0
  17. data/examples/classifiers/zero_and_one_r_example.rb +34 -0
  18. data/examples/classifiers/zero_one_r_data.csv +8 -0
  19. data/examples/clusterers/clusterer_example.rb +62 -0
  20. data/examples/clusterers/dbscan_example.rb +17 -0
  21. data/examples/clusterers/dendrogram_example.rb +17 -0
  22. data/examples/clusterers/hierarchical_dendrogram_example.rb +20 -0
  23. data/examples/clusterers/kmeans_custom_example.rb +26 -0
  24. data/examples/genetic_algorithm/bitstring_example.rb +41 -0
  25. data/examples/genetic_algorithm/genetic_algorithm_example.rb +26 -18
  26. data/examples/genetic_algorithm/kmeans_seed_tuning.rb +45 -0
  27. data/examples/neural_network/backpropagation_example.rb +49 -48
  28. data/examples/neural_network/hopfield_example.rb +45 -0
  29. data/examples/neural_network/patterns_with_base_noise.rb +39 -39
  30. data/examples/neural_network/patterns_with_noise.rb +41 -39
  31. data/examples/neural_network/train_epochs_callback.rb +25 -0
  32. data/examples/neural_network/training_patterns.rb +39 -39
  33. data/examples/neural_network/transformer_text_classification.rb +78 -0
  34. data/examples/neural_network/xor_example.rb +23 -22
  35. data/examples/reinforcement/q_learning_example.rb +10 -0
  36. data/examples/som/som_data.rb +155 -152
  37. data/examples/som/som_multi_node_example.rb +12 -13
  38. data/examples/som/som_single_example.rb +12 -15
  39. data/examples/transformer/decode_classifier_example.rb +68 -0
  40. data/examples/transformer/deterministic_example.rb +10 -0
  41. data/examples/transformer/seq2seq_example.rb +16 -0
  42. data/lib/ai4r/classifiers/classifier.rb +24 -16
  43. data/lib/ai4r/classifiers/gradient_boosting.rb +64 -0
  44. data/lib/ai4r/classifiers/hyperpipes.rb +119 -43
  45. data/lib/ai4r/classifiers/ib1.rb +122 -32
  46. data/lib/ai4r/classifiers/id3.rb +527 -144
  47. data/lib/ai4r/classifiers/logistic_regression.rb +96 -0
  48. data/lib/ai4r/classifiers/multilayer_perceptron.rb +75 -59
  49. data/lib/ai4r/classifiers/naive_bayes.rb +112 -48
  50. data/lib/ai4r/classifiers/one_r.rb +112 -44
  51. data/lib/ai4r/classifiers/prism.rb +167 -76
  52. data/lib/ai4r/classifiers/random_forest.rb +72 -0
  53. data/lib/ai4r/classifiers/simple_linear_regression.rb +143 -0
  54. data/lib/ai4r/classifiers/support_vector_machine.rb +91 -0
  55. data/lib/ai4r/classifiers/votes.rb +57 -0
  56. data/lib/ai4r/classifiers/zero_r.rb +71 -30
  57. data/lib/ai4r/clusterers/average_linkage.rb +46 -27
  58. data/lib/ai4r/clusterers/bisecting_k_means.rb +50 -44
  59. data/lib/ai4r/clusterers/centroid_linkage.rb +52 -36
  60. data/lib/ai4r/clusterers/cluster_tree.rb +50 -0
  61. data/lib/ai4r/clusterers/clusterer.rb +28 -24
  62. data/lib/ai4r/clusterers/complete_linkage.rb +42 -31
  63. data/lib/ai4r/clusterers/dbscan.rb +134 -0
  64. data/lib/ai4r/clusterers/diana.rb +75 -49
  65. data/lib/ai4r/clusterers/k_means.rb +309 -72
  66. data/lib/ai4r/clusterers/median_linkage.rb +49 -33
  67. data/lib/ai4r/clusterers/single_linkage.rb +196 -88
  68. data/lib/ai4r/clusterers/ward_linkage.rb +51 -35
  69. data/lib/ai4r/clusterers/ward_linkage_hierarchical.rb +63 -0
  70. data/lib/ai4r/clusterers/weighted_average_linkage.rb +48 -32
  71. data/lib/ai4r/data/data_set.rb +229 -100
  72. data/lib/ai4r/data/parameterizable.rb +31 -25
  73. data/lib/ai4r/data/proximity.rb +72 -50
  74. data/lib/ai4r/data/statistics.rb +46 -35
  75. data/lib/ai4r/experiment/classifier_evaluator.rb +84 -32
  76. data/lib/ai4r/experiment/split.rb +39 -0
  77. data/lib/ai4r/genetic_algorithm/chromosome_base.rb +43 -0
  78. data/lib/ai4r/genetic_algorithm/genetic_algorithm.rb +92 -170
  79. data/lib/ai4r/genetic_algorithm/tsp_chromosome.rb +83 -0
  80. data/lib/ai4r/hmm/hidden_markov_model.rb +134 -0
  81. data/lib/ai4r/neural_network/activation_functions.rb +37 -0
  82. data/lib/ai4r/neural_network/backpropagation.rb +419 -143
  83. data/lib/ai4r/neural_network/hopfield.rb +175 -58
  84. data/lib/ai4r/neural_network/transformer.rb +194 -0
  85. data/lib/ai4r/neural_network/weight_initializations.rb +40 -0
  86. data/lib/ai4r/reinforcement/policy_iteration.rb +66 -0
  87. data/lib/ai4r/reinforcement/q_learning.rb +51 -0
  88. data/lib/ai4r/search/a_star.rb +76 -0
  89. data/lib/ai4r/search/bfs.rb +50 -0
  90. data/lib/ai4r/search/dfs.rb +50 -0
  91. data/lib/ai4r/search/mcts.rb +118 -0
  92. data/lib/ai4r/search.rb +12 -0
  93. data/lib/ai4r/som/distance_metrics.rb +29 -0
  94. data/lib/ai4r/som/layer.rb +28 -17
  95. data/lib/ai4r/som/node.rb +61 -32
  96. data/lib/ai4r/som/som.rb +158 -41
  97. data/lib/ai4r/som/two_phase_layer.rb +21 -25
  98. data/lib/ai4r/version.rb +3 -0
  99. data/lib/ai4r.rb +58 -27
  100. metadata +117 -106
  101. data/README.rdoc +0 -44
  102. data/test/classifiers/hyperpipes_test.rb +0 -84
  103. data/test/classifiers/ib1_test.rb +0 -78
  104. data/test/classifiers/id3_test.rb +0 -208
  105. data/test/classifiers/multilayer_perceptron_test.rb +0 -79
  106. data/test/classifiers/naive_bayes_test.rb +0 -43
  107. data/test/classifiers/one_r_test.rb +0 -62
  108. data/test/classifiers/prism_test.rb +0 -85
  109. data/test/classifiers/zero_r_test.rb +0 -50
  110. data/test/clusterers/average_linkage_test.rb +0 -51
  111. data/test/clusterers/bisecting_k_means_test.rb +0 -66
  112. data/test/clusterers/centroid_linkage_test.rb +0 -53
  113. data/test/clusterers/complete_linkage_test.rb +0 -57
  114. data/test/clusterers/diana_test.rb +0 -69
  115. data/test/clusterers/k_means_test.rb +0 -100
  116. data/test/clusterers/median_linkage_test.rb +0 -53
  117. data/test/clusterers/single_linkage_test.rb +0 -122
  118. data/test/clusterers/ward_linkage_test.rb +0 -53
  119. data/test/clusterers/weighted_average_linkage_test.rb +0 -53
  120. data/test/data/data_set_test.rb +0 -96
  121. data/test/data/proximity_test.rb +0 -81
  122. data/test/data/statistics_test.rb +0 -65
  123. data/test/experiment/classifier_evaluator_test.rb +0 -76
  124. data/test/genetic_algorithm/chromosome_test.rb +0 -57
  125. data/test/genetic_algorithm/genetic_algorithm_test.rb +0 -81
  126. data/test/neural_network/backpropagation_test.rb +0 -82
  127. data/test/neural_network/hopfield_test.rb +0 -72
  128. data/test/som/som_test.rb +0 -97
@@ -1,162 +1,354 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Author:: Sergio Fierens
2
4
  # License:: MPL 1.1
3
5
  # Project:: ai4r
4
- # Url:: http://ai4r.org/
6
+ # Url:: https://github.com/SergioFierens/ai4r
5
7
  #
6
- # You can redistribute it and/or modify it under the terms of
7
- # the Mozilla Public License version 1.1 as published by the
8
+ # You can redistribute it and/or modify it under the terms of
9
+ # the Mozilla Public License version 1.1 as published by the
8
10
  # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
11
 
10
- require File.dirname(__FILE__) + '/../data/parameterizable'
12
+ require_relative '../data/parameterizable'
13
+ require_relative 'activation_functions'
14
+ require_relative 'weight_initializations'
11
15
 
12
16
  module Ai4r
13
-
14
- # Artificial Neural Networks are mathematical or computational models based on
15
- # biological neural networks.
16
- #
17
+ # Artificial Neural Networks are mathematical or computational models based on
18
+ # biological neural networks.
19
+ #
17
20
  # More about neural networks:
18
- #
21
+ #
19
22
  # * http://en.wikipedia.org/wiki/Artificial_neural_network
20
23
  #
21
24
  module NeuralNetwork
22
-
23
25
  # = Introduction
24
- #
26
+ #
25
27
  # This is an implementation of a multilayer perceptron network, using
26
28
  # the backpropagation algorithm for learning.
27
- #
28
- # Backpropagation is a supervised learning technique (described
29
- # by Paul Werbos in 1974, and further developed by David E.
29
+ #
30
+ # Backpropagation is a supervised learning technique (described
31
+ # by Paul Werbos in 1974, and further developed by David E.
30
32
  # Rumelhart, Geoffrey E. Hinton and Ronald J. Williams in 1986)
31
- #
33
+ #
32
34
  # = Features
33
- #
35
+ #
34
36
  # * Support for any network architecture (number of layers and neurons)
35
37
  # * Configurable propagation function
36
- # * Optional usage of bias
38
+ # * Optional usage of bias
37
39
  # * Configurable momentum
38
40
  # * Configurable learning rate
39
41
  # * Configurable initial weight function
40
42
  # * 100% ruby code, no external dependency
41
- #
43
+ #
42
44
  # = Parameters
43
- #
45
+ #
44
46
  # Use class method get_parameters_info to obtain details on the algorithm
45
47
  # parameters. Use set_parameters to set values for this parameters.
46
- #
47
- # * :disable_bias => If true, the alforithm will not use bias nodes.
48
+ #
49
+ # * :disable_bias => If true, the algorithm will not use bias nodes.
48
50
  # False by default.
49
- # * :initial_weight_function => f(n, i, j) must return the initial
50
- # weight for the conection between the node i in layer n, and node j in
51
+ # * :initial_weight_function => f(n, i, j) must return the initial
52
+ # weight for the conection between the node i in layer n, and node j in
51
53
  # layer n+1. By default a random number in [-1, 1) range.
52
- # * :propagation_function => By default:
54
+ # * :propagation_function => By default:
53
55
  # lambda { |x| 1/(1+Math.exp(-1*(x))) }
54
- # * :derivative_propagation_function => Derivative of the propagation
55
- # function, based on propagation function output.
56
+ # * :derivative_propagation_function => Derivative of the propagation
57
+ # function, based on propagation function output.
56
58
  # By default: lambda { |y| y*(1-y) }, where y=propagation_function(x)
57
- # * :learning_rate => By default 0.25
59
+ # * :activation => Built-in activation name (:sigmoid, :tanh or :relu).
60
+ # Selecting this overrides propagation_function and derivative_propagation_function.
61
+ # Default: :sigmoid
62
+ # * :learning_rate => By default 0.25
58
63
  # * :momentum => By default 0.1. Set this parameter to 0 to disable
59
64
  # momentum
60
- #
65
+ #
61
66
  # = How to use it
62
- #
67
+ #
63
68
  # # Create the network with 4 inputs, 1 hidden layer with 3 neurons,
64
69
  # # and 2 outputs
65
- # net = Ai4r::NeuralNetwork::Backpropagation.new([4, 3, 2])
70
+ # net = Ai4r::NeuralNetwork::Backpropagation.new([4, 3, 2])
66
71
  #
67
- # # Train the network
72
+ # # Train the network
68
73
  # 1000.times do |i|
69
74
  # net.train(example[i], result[i])
70
75
  # end
71
- #
76
+ #
72
77
  # # Use it: Evaluate data with the trained network
73
- # net.eval([12, 48, 12, 25])
74
- # => [0.86, 0.01]
75
- #
78
+ # net.eval([12, 48, 12, 25])
79
+ # => [0.86, 0.01]
80
+ #
76
81
  # More about multilayer perceptron neural networks and backpropagation:
77
- #
82
+ #
78
83
  # * http://en.wikipedia.org/wiki/Backpropagation
79
84
  # * http://en.wikipedia.org/wiki/Multilayer_perceptron
80
- #
85
+ #
81
86
  # = About the project
82
87
  # Author:: Sergio Fierens
83
88
  # License:: MPL 1.1
84
- # Url:: http://ai4r.org
89
+ # Url:: https://github.com/SergioFierens/ai4r
85
90
  class Backpropagation
86
-
87
91
  include Ai4r::Data::Parameterizable
88
-
89
- parameters_info :disable_bias => "If true, the alforithm will not use "+
90
- "bias nodes. False by default.",
91
- :initial_weight_function => "f(n, i, j) must return the initial "+
92
- "weight for the conection between the node i in layer n, and "+
93
- "node j in layer n+1. By default a random number in [-1, 1) range.",
94
- :propagation_function => "By default: " +
95
- "lambda { |x| 1/(1+Math.exp(-1*(x))) }",
96
- :derivative_propagation_function => "Derivative of the propagation "+
97
- "function, based on propagation function output. By default: " +
98
- "lambda { |y| y*(1-y) }, where y=propagation_function(x)",
99
- :learning_rate => "By default 0.25",
100
- :momentum => "By default 0.1. Set this parameter to 0 to disable "+
101
- "momentum."
102
-
92
+
103
93
  attr_accessor :structure, :weights, :activation_nodes, :last_changes
104
-
94
+
95
+ # When the activation parameter changes, update internal lambdas for each
96
+ # layer. Accepts a single symbol or an array of symbols (one for each
97
+ # layer except the input layer).
98
+ # @param symbols [Object]
99
+ # @return [Object]
100
+ def activation=(symbols)
101
+ symbols = [symbols] unless symbols.is_a?(Array)
102
+ layer_count = @structure.length - 1
103
+ if symbols.length == 1
104
+ symbols = Array.new(layer_count, symbols.first)
105
+ elsif symbols.length != layer_count
106
+ raise ArgumentError, "Activation array size must match number of layers (#{layer_count})"
107
+ end
108
+ @activation = symbols
109
+ @propagation_functions = @activation.map do |a|
110
+ Ai4r::NeuralNetwork::ActivationFunctions::FUNCTIONS[a] ||
111
+ Ai4r::NeuralNetwork::ActivationFunctions::FUNCTIONS[:sigmoid]
112
+ end
113
+ @derivative_functions = @activation.map do |a|
114
+ Ai4r::NeuralNetwork::ActivationFunctions::DERIVATIVES[a] ||
115
+ Ai4r::NeuralNetwork::ActivationFunctions::DERIVATIVES[:sigmoid]
116
+ end
117
+ end
118
+
119
+ # @return [Object]
120
+ def activation
121
+ if @activation.is_a?(Array)
122
+ if @set_by_loss || (@loss_function == :cross_entropy && @activation_overridden)
123
+ @activation.first
124
+ else
125
+ @activation
126
+ end
127
+ else
128
+ @activation
129
+ end
130
+ end
131
+
132
+ # @param symbol [Object]
133
+ # @return [Object]
134
+ def weight_init=(symbol)
135
+ @weight_init = symbol
136
+ @initial_weight_function = case symbol
137
+ when :xavier
138
+ Ai4r::NeuralNetwork::WeightInitializations.xavier(@structure)
139
+ when :he
140
+ Ai4r::NeuralNetwork::WeightInitializations.he(@structure)
141
+ else
142
+ Ai4r::NeuralNetwork::WeightInitializations.uniform
143
+ end
144
+ end
145
+
146
+ # @param symbol [Object]
147
+ # @return [Object]
148
+ def loss_function=(symbol)
149
+ @loss_function = symbol
150
+ return unless symbol == :cross_entropy && !@activation_overridden && !@custom_propagation
151
+
152
+ @set_by_loss = true
153
+ self.activation = :softmax
154
+ @activation_overridden = false
155
+ end
156
+
105
157
  # Creates a new network specifying the its architecture.
106
158
  # E.g.
107
- #
159
+ #
108
160
  # net = Backpropagation.new([4, 3, 2]) # 4 inputs
109
- # # 1 hidden layer with 3 neurons,
110
- # # 2 outputs
161
+ # # 1 hidden layer with 3 neurons,
162
+ # # 2 outputs
111
163
  # net = Backpropagation.new([2, 3, 3, 4]) # 2 inputs
112
- # # 2 hidden layer with 3 neurons each,
113
- # # 4 outputs
164
+ # # 2 hidden layer with 3 neurons each,
165
+ # # 4 outputs
114
166
  # net = Backpropagation.new([2, 1]) # 2 inputs
115
167
  # # No hidden layer
116
- # # 1 output
117
- def initialize(network_structure)
168
+ # # 1 output
169
+ # @param network_structure [Object]
170
+ # @param activation [Object]
171
+ # @param weight_init [Object]
172
+ # @return [Object]
173
+ def initialize(network_structure, activation = :sigmoid, weight_init = :uniform)
118
174
  @structure = network_structure
119
- @initial_weight_function = lambda { |n, i, j| ((rand 2000)/1000.0) - 1}
120
- @propagation_function = lambda { |x| 1/(1+Math.exp(-1*(x))) } #lambda { |x| Math.tanh(x) }
121
- @derivative_propagation_function = lambda { |y| y*(1-y) } #lambda { |y| 1.0 - y**2 }
175
+ self.weight_init = weight_init
176
+ @custom_propagation = false
177
+ @set_by_loss = true
178
+ self.activation = activation
179
+ @activation_overridden = (activation != :sigmoid)
180
+ @set_by_loss = false
122
181
  @disable_bias = false
123
182
  @learning_rate = 0.25
124
183
  @momentum = 0.1
184
+ @loss_function = :mse
125
185
  end
126
186
 
127
- # Evaluates the input.
128
- # E.g.
129
- # net = Backpropagation.new([4, 3, 2])
130
187
  # net.eval([25, 32.3, 12.8, 1.5])
131
188
  # # => [0.83, 0.03]
189
+ # @param input_values [Object]
190
+ # @return [Object]
132
191
  def eval(input_values)
133
192
  check_input_dimension(input_values.length)
134
- init_network if !@weights
193
+ init_network unless @weights
135
194
  feedforward(input_values)
136
- return @activation_nodes.last.clone
195
+ @activation_nodes.last.clone
196
+ end
197
+
198
+ # Evaluates the input and returns most active node
199
+ # E.g.
200
+ # net = Backpropagation.new([4, 3, 2])
201
+ # net.eval_result([25, 32.3, 12.8, 1.5])
202
+ # # eval gives [0.83, 0.03]
203
+ # # => 0
204
+ # @param input_values [Object]
205
+ # @return [Object]
206
+ def eval_result(input_values)
207
+ result = eval(input_values)
208
+ result.index(result.max)
137
209
  end
138
-
210
+
139
211
  # This method trains the network using the backpropagation algorithm.
140
- #
212
+ #
141
213
  # input: Networks input
142
- #
214
+ #
143
215
  # output: Expected output for the given input.
144
216
  #
145
- # This method returns the network error:
146
- # => 0.5 * sum( (expected_value[i] - output_value[i])**2 )
217
+ # This method returns the training loss according to +loss_function+.
218
+ # @param inputs [Object]
219
+ # @param outputs [Object]
220
+ # @return [Object]
147
221
  def train(inputs, outputs)
148
222
  eval(inputs)
149
223
  backpropagate(outputs)
150
- calculate_error(outputs)
224
+ calculate_loss(outputs, @activation_nodes.last)
225
+ end
226
+
227
+ # Train a list of input/output pairs and return average loss.
228
+ # @param batch_inputs [Object]
229
+ # @param batch_outputs [Object]
230
+ # @return [Object]
231
+ def train_batch(batch_inputs, batch_outputs)
232
+ if batch_inputs.length != batch_outputs.length
233
+ raise ArgumentError,
234
+ 'Inputs and outputs size mismatch'
235
+ end
236
+
237
+ batch_size = batch_inputs.length
238
+ init_network unless @weights
239
+
240
+ accumulated_changes = Array.new(@weights.length) do |w|
241
+ Array.new(@weights[w].length) do |i|
242
+ Array.new(@weights[w][i].length, 0.0)
243
+ end
244
+ end
245
+
246
+ sum_error = 0.0
247
+ batch_inputs.each_index do |idx|
248
+ inputs = batch_inputs[idx]
249
+ outputs = batch_outputs[idx]
250
+ eval(inputs)
251
+ calculate_output_deltas(outputs)
252
+ calculate_internal_deltas
253
+
254
+ (@weights.length - 1).downto(0) do |n|
255
+ @weights[n].each_index do |i|
256
+ @weights[n][i].each_index do |j|
257
+ change = @deltas[n][j] * @activation_nodes[n][i]
258
+ accumulated_changes[n][i][j] += change
259
+ end
260
+ end
261
+ end
262
+
263
+ sum_error += calculate_loss(outputs, @activation_nodes.last)
264
+ end
265
+
266
+ (@weights.length - 1).downto(0) do |n|
267
+ @weights[n].each_index do |i|
268
+ @weights[n][i].each_index do |j|
269
+ avg_change = accumulated_changes[n][i][j] / batch_size.to_f
270
+ @weights[n][i][j] += (learning_rate * avg_change) + (momentum * @last_changes[n][i][j])
271
+ @last_changes[n][i][j] = avg_change
272
+ end
273
+ end
274
+ end
275
+
276
+ sum_error / batch_size.to_f
151
277
  end
152
-
153
- # Initialize (or reset) activation nodes and weights, with the
278
+
279
+ # Train for a number of epochs over the dataset. Optionally define a batch size.
280
+ # Data can be shuffled between epochs passing +shuffle: true+ (default).
281
+ # Use +random_seed+ to make shuffling deterministic.
282
+ # Returns an array with the average loss of each epoch.
283
+ # @return [Object]
284
+ def train_epochs(data_inputs, data_outputs, epochs:, batch_size: 1,
285
+ early_stopping_patience: nil, min_delta: 0.0,
286
+ shuffle: true, random_seed: nil, &block)
287
+ if data_inputs.length != data_outputs.length
288
+ raise ArgumentError,
289
+ 'Inputs and outputs size mismatch'
290
+ end
291
+
292
+ losses = []
293
+ best_loss = Float::INFINITY
294
+ patience = early_stopping_patience
295
+ patience_counter = 0
296
+ rng = random_seed.nil? ? Random.new : Random.new(random_seed)
297
+ epochs.times do |epoch|
298
+ epoch_error = 0.0
299
+ epoch_inputs = data_inputs
300
+ epoch_outputs = data_outputs
301
+ if shuffle
302
+ indices = (0...data_inputs.length).to_a.shuffle(random: rng)
303
+ epoch_inputs = data_inputs.values_at(*indices)
304
+ epoch_outputs = data_outputs.values_at(*indices)
305
+ end
306
+ index = 0
307
+ while index < epoch_inputs.length
308
+ batch_in = epoch_inputs[index, batch_size]
309
+ batch_out = epoch_outputs[index, batch_size]
310
+ batch_error = train_batch(batch_in, batch_out)
311
+ epoch_error += batch_error * batch_in.length
312
+ index += batch_size
313
+ end
314
+ epoch_loss = epoch_error / data_inputs.length.to_f
315
+ losses << epoch_loss
316
+ if block
317
+ if block.arity >= 3
318
+ correct = 0
319
+ data_inputs.each_index do |i|
320
+ output = eval(data_inputs[i])
321
+ predicted = output.index(output.max)
322
+ expected = data_outputs[i].index(data_outputs[i].max)
323
+ correct += 1 if predicted == expected
324
+ end
325
+ accuracy = correct.to_f / data_inputs.length
326
+ block.call(epoch, epoch_loss, accuracy)
327
+ else
328
+ block.call(epoch, epoch_loss)
329
+ end
330
+ end
331
+ if patience
332
+ if best_loss - epoch_loss > min_delta
333
+ best_loss = epoch_loss
334
+ patience_counter = 0
335
+ else
336
+ patience_counter += 1
337
+ break if patience_counter >= patience
338
+ end
339
+ end
340
+ end
341
+ losses
342
+ end
343
+
344
+ # Initialize (or reset) activation nodes and weights, with the
154
345
  # provided net structure and parameters.
346
+ # @return [Object]
155
347
  def init_network
156
348
  init_activation_nodes
157
349
  init_weights
158
350
  init_last_changes
159
- return self
351
+ self
160
352
  end
161
353
 
162
354
  protected
@@ -168,6 +360,7 @@ module Ai4r
168
360
  # * propagation_function
169
361
  # * derivative_propagation_function
170
362
  # you must restore their values manually after loading the instance.
363
+ # @return [Object]
171
364
  def marshal_dump
172
365
  [
173
366
  @structure,
@@ -176,151 +369,234 @@ module Ai4r
176
369
  @momentum,
177
370
  @weights,
178
371
  @last_changes,
179
- @activation_nodes
372
+ @activation_nodes,
373
+ @activation
180
374
  ]
181
- end
182
-
183
- def marshal_load(ary)
184
- @structure,
185
- @disable_bias,
186
- @learning_rate,
187
- @momentum,
188
- @weights,
189
- @last_changes,
190
- @activation_nodes = ary
191
- @initial_weight_function = lambda { |n, i, j| ((rand 2000)/1000.0) - 1}
192
- @propagation_function = lambda { |x| 1/(1+Math.exp(-1*(x))) } #lambda { |x| Math.tanh(x) }
193
- @derivative_propagation_function = lambda { |y| y*(1-y) } #lambda { |y| 1.0 - y**2 }
194
- end
375
+ end
195
376
 
377
+ # @param ary [Object]
378
+ # @return [Object]
379
+ def marshal_load(ary)
380
+ @structure,
381
+ @disable_bias,
382
+ @learning_rate,
383
+ @momentum,
384
+ @weights,
385
+ @last_changes,
386
+ @activation_nodes,
387
+ @activation = ary
388
+ self.weight_init = :uniform
389
+ self.activation = @activation || :sigmoid
390
+ end
196
391
 
197
392
  # Propagate error backwards
393
+ # @param expected_output_values [Object]
394
+ # @return [Object]
198
395
  def backpropagate(expected_output_values)
199
396
  check_output_dimension(expected_output_values.length)
200
397
  calculate_output_deltas(expected_output_values)
201
398
  calculate_internal_deltas
202
399
  update_weights
203
400
  end
204
-
401
+
205
402
  # Propagate values forward
403
+ # @param input_values [Object]
404
+ # @return [Object]
206
405
  def feedforward(input_values)
207
- input_values.each_index do |input_index|
406
+ input_values.each_index do |input_index|
208
407
  @activation_nodes.first[input_index] = input_values[input_index]
209
408
  end
210
409
  @weights.each_index do |n|
211
- @structure[n+1].times do |j|
212
- sum = 0.0
410
+ sums = Array.new(@structure[n + 1], 0.0)
411
+ @structure[n + 1].times do |j|
213
412
  @activation_nodes[n].each_index do |i|
214
- sum += (@activation_nodes[n][i] * @weights[n][i][j])
413
+ sums[j] += (@activation_nodes[n][i] * @weights[n][i][j])
414
+ end
415
+ end
416
+ if @activation[n] == :softmax
417
+ values = @propagation_functions[n].call(sums)
418
+ values.each_index { |j| @activation_nodes[n + 1][j] = values[j] }
419
+ else
420
+ sums.each_index do |j|
421
+ @activation_nodes[n + 1][j] = @propagation_functions[n].call(sums[j])
215
422
  end
216
- @activation_nodes[n+1][j] = @propagation_function.call(sum)
217
423
  end
218
- end
424
+ end
219
425
  end
220
-
426
+
221
427
  # Initialize neurons structure.
428
+ # @return [Object]
222
429
  def init_activation_nodes
223
- @activation_nodes = Array.new(@structure.length) do |n|
430
+ @activation_nodes = Array.new(@structure.length) do |n|
224
431
  Array.new(@structure[n], 1.0)
225
432
  end
226
- if not disable_bias
227
- @activation_nodes[0...-1].each {|layer| layer << 1.0 }
228
- end
433
+ return if disable_bias
434
+
435
+ @activation_nodes[0...-1].each { |layer| layer << 1.0 }
229
436
  end
230
-
437
+
231
438
  # Initialize the weight arrays using function specified with the
232
439
  # initial_weight_function parameter
440
+ # @return [Object]
233
441
  def init_weights
234
- @weights = Array.new(@structure.length-1) do |i|
442
+ @weights = Array.new(@structure.length - 1) do |i|
235
443
  nodes_origin = @activation_nodes[i].length
236
- nodes_target = @structure[i+1]
444
+ nodes_target = @structure[i + 1]
237
445
  Array.new(nodes_origin) do |j|
238
- Array.new(nodes_target) do |k|
446
+ Array.new(nodes_target) do |k|
239
447
  @initial_weight_function.call(i, j, k)
240
448
  end
241
449
  end
242
450
  end
243
- end
451
+ end
244
452
 
245
- # Momentum usage need to know how much a weight changed in the
246
- # previous training. This method initialize the @last_changes
453
+ # Momentum usage need to know how much a weight changed in the
454
+ # previous training. This method initialize the @last_changes
247
455
  # structure with 0 values.
456
+ # @return [Object]
248
457
  def init_last_changes
249
458
  @last_changes = Array.new(@weights.length) do |w|
250
- Array.new(@weights[w].length) do |i|
459
+ Array.new(@weights[w].length) do |i|
251
460
  Array.new(@weights[w][i].length, 0.0)
252
461
  end
253
462
  end
254
463
  end
255
-
464
+
256
465
  # Calculate deltas for output layer
466
+ # @param expected_values [Object]
467
+ # @return [Object]
257
468
  def calculate_output_deltas(expected_values)
258
469
  output_values = @activation_nodes.last
259
470
  output_deltas = []
471
+ func = @derivative_functions.last
260
472
  output_values.each_index do |output_index|
261
- error = expected_values[output_index] - output_values[output_index]
262
- output_deltas << @derivative_propagation_function.call(
263
- output_values[output_index]) * error
473
+ if @loss_function == :cross_entropy && @activation == :softmax
474
+ output_deltas << (output_values[output_index] - expected_values[output_index])
475
+ else
476
+ error = expected_values[output_index] - output_values[output_index]
477
+ output_deltas << (func.call(output_values[output_index]) * error)
478
+ end
264
479
  end
265
480
  @deltas = [output_deltas]
266
481
  end
267
-
482
+
268
483
  # Calculate deltas for hidden layers
484
+ # @return [Object]
269
485
  def calculate_internal_deltas
270
486
  prev_deltas = @deltas.last
271
- (@activation_nodes.length-2).downto(1) do |layer_index|
487
+ (@activation_nodes.length - 2).downto(1) do |layer_index|
272
488
  layer_deltas = []
273
489
  @activation_nodes[layer_index].each_index do |j|
274
490
  error = 0.0
275
- @structure[layer_index+1].times do |k|
491
+ @structure[layer_index + 1].times do |k|
276
492
  error += prev_deltas[k] * @weights[layer_index][j][k]
277
493
  end
278
- layer_deltas[j] = (@derivative_propagation_function.call(
279
- @activation_nodes[layer_index][j]) * error)
494
+ func = @derivative_functions[layer_index - 1]
495
+ layer_deltas[j] = func.call(@activation_nodes[layer_index][j]) * error
280
496
  end
281
497
  prev_deltas = layer_deltas
282
498
  @deltas.unshift(layer_deltas)
283
499
  end
284
500
  end
285
-
501
+
286
502
  # Update weights after @deltas have been calculated.
503
+ # @return [Object]
287
504
  def update_weights
288
- (@weights.length-1).downto(0) do |n|
289
- @weights[n].each_index do |i|
290
- @weights[n][i].each_index do |j|
291
- change = @deltas[n][j]*@activation_nodes[n][i]
292
- @weights[n][i][j] += ( learning_rate * change +
293
- momentum * @last_changes[n][i][j])
505
+ (@weights.length - 1).downto(0) do |n|
506
+ @weights[n].each_index do |i|
507
+ @weights[n][i].each_index do |j|
508
+ change = @deltas[n][j] * @activation_nodes[n][i]
509
+ @weights[n][i][j] += ((learning_rate * change) +
510
+ (momentum * @last_changes[n][i][j]))
294
511
  @last_changes[n][i][j] = change
295
512
  end
296
513
  end
297
514
  end
298
515
  end
299
-
300
- # Calculate quadratic error for a expected output value
516
+
517
+ # Calculate quadratic error for an expected output value
301
518
  # Error = 0.5 * sum( (expected_value[i] - output_value[i])**2 )
519
+ # @param expected_output [Object]
520
+ # @return [Object]
302
521
  def calculate_error(expected_output)
303
522
  output_values = @activation_nodes.last
304
523
  error = 0.0
305
524
  expected_output.each_index do |output_index|
306
- error +=
307
- 0.5*(output_values[output_index]-expected_output[output_index])**2
525
+ error +=
526
+ 0.5 * ((output_values[output_index] - expected_output[output_index])**2)
308
527
  end
309
- return error
528
+ error
310
529
  end
311
-
530
+
531
+ # Calculate loss for expected/actual vectors according to selected
532
+ # loss_function (:mse or :cross_entropy).
533
+ # @param expected [Object]
534
+ # @param actual [Object]
535
+ # @return [Object]
536
+ def calculate_loss(expected, actual)
537
+ case @loss_function
538
+ when :cross_entropy
539
+ epsilon = 1e-12
540
+ loss = 0.0
541
+ if @activation == :softmax
542
+ expected.each_index do |i|
543
+ p = [[actual[i], epsilon].max, 1 - epsilon].min
544
+ loss -= expected[i] * Math.log(p)
545
+ end
546
+ else
547
+ expected.each_index do |i|
548
+ p = [[actual[i], epsilon].max, 1 - epsilon].min
549
+ loss -= (expected[i] * Math.log(p)) + ((1 - expected[i]) * Math.log(1 - p))
550
+ end
551
+ end
552
+ loss
553
+ else
554
+ # Mean squared error
555
+ error = 0.0
556
+ expected.each_index do |i|
557
+ error += 0.5 * ((expected[i] - actual[i])**2)
558
+ end
559
+ error
560
+ end
561
+ end
562
+
563
+ # @param inputs [Object]
564
+ # @return [Object]
312
565
  def check_input_dimension(inputs)
313
- raise ArgumentError, "Wrong number of inputs. " +
314
- "Expected: #{@structure.first}, " +
315
- "received: #{inputs}." if inputs!=@structure.first
566
+ return unless inputs != @structure.first
567
+
568
+ raise ArgumentError, 'Wrong number of inputs. ' \
569
+ "Expected: #{@structure.first}, " \
570
+ "received: #{inputs}."
316
571
  end
317
572
 
573
+ # @param outputs [Object]
574
+ # @return [Object]
318
575
  def check_output_dimension(outputs)
319
- raise ArgumentError, "Wrong number of outputs. " +
320
- "Expected: #{@structure.last}, " +
321
- "received: #{outputs}." if outputs!=@structure.last
576
+ return unless outputs != @structure.last
577
+
578
+ raise ArgumentError, 'Wrong number of outputs. ' \
579
+ "Expected: #{@structure.last}, " \
580
+ "received: #{outputs}."
322
581
  end
323
-
582
+
583
+ parameters_info disable_bias: 'If true, the algorithm will not use ' \
584
+ 'bias nodes. False by default.',
585
+ initial_weight_function: 'f(n, i, j) must return the initial ' \
586
+ 'weight for the conection between the node i in layer n, and ' \
587
+ 'node j in layer n+1. By default a random number in [-1, 1) range.',
588
+ weight_init: 'Built-in weight initialization strategy (:uniform, :xavier or :he). Default: :uniform',
589
+ propagation_function: 'By default: ' \
590
+ 'lambda { |x| 1/(1+Math.exp(-1*(x))) }',
591
+ derivative_propagation_function: 'Derivative of the propagation ' \
592
+ 'function, based on propagation function output. By default: ' \
593
+ 'lambda { |y| y*(1-y) }, where y=propagation_function(x)',
594
+ activation: 'Activation function per layer. Provide a symbol or an array of symbols (:sigmoid, :tanh, :relu or :softmax). Default: :sigmoid',
595
+ learning_rate: 'By default 0.25',
596
+ momentum: 'By default 0.1. Set this parameter to 0 to disable ' \
597
+ 'momentum.',
598
+ loss_function: 'Loss function used when training (:mse or ' \
599
+ ':cross_entropy). Default: :mse'
324
600
  end
325
601
  end
326
602
  end