ai4r 1.13 → 2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +174 -0
- data/examples/classifiers/hyperpipes_data.csv +14 -0
- data/examples/classifiers/hyperpipes_example.rb +22 -0
- data/examples/classifiers/ib1_example.rb +12 -0
- data/examples/classifiers/id3_example.rb +15 -10
- data/examples/classifiers/id3_graphviz_example.rb +17 -0
- data/examples/classifiers/logistic_regression_example.rb +11 -0
- data/examples/classifiers/naive_bayes_attributes_example.rb +13 -0
- data/examples/classifiers/naive_bayes_example.rb +12 -13
- data/examples/classifiers/one_r_example.rb +27 -0
- data/examples/classifiers/parameter_tutorial.rb +29 -0
- data/examples/classifiers/prism_nominal_example.rb +15 -0
- data/examples/classifiers/prism_numeric_example.rb +21 -0
- data/examples/classifiers/simple_linear_regression_example.rb +14 -11
- data/examples/classifiers/zero_and_one_r_example.rb +34 -0
- data/examples/classifiers/zero_one_r_data.csv +8 -0
- data/examples/clusterers/clusterer_example.rb +40 -34
- data/examples/clusterers/dbscan_example.rb +17 -0
- data/examples/clusterers/dendrogram_example.rb +17 -0
- data/examples/clusterers/hierarchical_dendrogram_example.rb +20 -0
- data/examples/clusterers/kmeans_custom_example.rb +26 -0
- data/examples/genetic_algorithm/bitstring_example.rb +41 -0
- data/examples/genetic_algorithm/genetic_algorithm_example.rb +26 -18
- data/examples/genetic_algorithm/kmeans_seed_tuning.rb +45 -0
- data/examples/neural_network/backpropagation_example.rb +48 -48
- data/examples/neural_network/hopfield_example.rb +45 -0
- data/examples/neural_network/patterns_with_base_noise.rb +39 -39
- data/examples/neural_network/patterns_with_noise.rb +41 -39
- data/examples/neural_network/train_epochs_callback.rb +25 -0
- data/examples/neural_network/training_patterns.rb +39 -39
- data/examples/neural_network/transformer_text_classification.rb +78 -0
- data/examples/neural_network/xor_example.rb +23 -22
- data/examples/reinforcement/q_learning_example.rb +10 -0
- data/examples/som/som_data.rb +155 -152
- data/examples/som/som_multi_node_example.rb +12 -13
- data/examples/som/som_single_example.rb +12 -15
- data/examples/transformer/decode_classifier_example.rb +68 -0
- data/examples/transformer/deterministic_example.rb +10 -0
- data/examples/transformer/seq2seq_example.rb +16 -0
- data/lib/ai4r/classifiers/classifier.rb +24 -16
- data/lib/ai4r/classifiers/gradient_boosting.rb +64 -0
- data/lib/ai4r/classifiers/hyperpipes.rb +119 -43
- data/lib/ai4r/classifiers/ib1.rb +122 -32
- data/lib/ai4r/classifiers/id3.rb +524 -145
- data/lib/ai4r/classifiers/logistic_regression.rb +96 -0
- data/lib/ai4r/classifiers/multilayer_perceptron.rb +75 -59
- data/lib/ai4r/classifiers/naive_bayes.rb +95 -34
- data/lib/ai4r/classifiers/one_r.rb +112 -44
- data/lib/ai4r/classifiers/prism.rb +167 -76
- data/lib/ai4r/classifiers/random_forest.rb +72 -0
- data/lib/ai4r/classifiers/simple_linear_regression.rb +83 -58
- data/lib/ai4r/classifiers/support_vector_machine.rb +91 -0
- data/lib/ai4r/classifiers/votes.rb +57 -0
- data/lib/ai4r/classifiers/zero_r.rb +71 -30
- data/lib/ai4r/clusterers/average_linkage.rb +46 -27
- data/lib/ai4r/clusterers/bisecting_k_means.rb +50 -44
- data/lib/ai4r/clusterers/centroid_linkage.rb +52 -36
- data/lib/ai4r/clusterers/cluster_tree.rb +50 -0
- data/lib/ai4r/clusterers/clusterer.rb +29 -14
- data/lib/ai4r/clusterers/complete_linkage.rb +42 -31
- data/lib/ai4r/clusterers/dbscan.rb +134 -0
- data/lib/ai4r/clusterers/diana.rb +75 -49
- data/lib/ai4r/clusterers/k_means.rb +270 -135
- data/lib/ai4r/clusterers/median_linkage.rb +49 -33
- data/lib/ai4r/clusterers/single_linkage.rb +196 -88
- data/lib/ai4r/clusterers/ward_linkage.rb +51 -35
- data/lib/ai4r/clusterers/ward_linkage_hierarchical.rb +25 -10
- data/lib/ai4r/clusterers/weighted_average_linkage.rb +48 -32
- data/lib/ai4r/data/data_set.rb +223 -103
- data/lib/ai4r/data/parameterizable.rb +31 -25
- data/lib/ai4r/data/proximity.rb +62 -62
- data/lib/ai4r/data/statistics.rb +46 -35
- data/lib/ai4r/experiment/classifier_evaluator.rb +84 -32
- data/lib/ai4r/experiment/split.rb +39 -0
- data/lib/ai4r/genetic_algorithm/chromosome_base.rb +43 -0
- data/lib/ai4r/genetic_algorithm/genetic_algorithm.rb +92 -170
- data/lib/ai4r/genetic_algorithm/tsp_chromosome.rb +83 -0
- data/lib/ai4r/hmm/hidden_markov_model.rb +134 -0
- data/lib/ai4r/neural_network/activation_functions.rb +37 -0
- data/lib/ai4r/neural_network/backpropagation.rb +399 -134
- data/lib/ai4r/neural_network/hopfield.rb +175 -58
- data/lib/ai4r/neural_network/transformer.rb +194 -0
- data/lib/ai4r/neural_network/weight_initializations.rb +40 -0
- data/lib/ai4r/reinforcement/policy_iteration.rb +66 -0
- data/lib/ai4r/reinforcement/q_learning.rb +51 -0
- data/lib/ai4r/search/a_star.rb +76 -0
- data/lib/ai4r/search/bfs.rb +50 -0
- data/lib/ai4r/search/dfs.rb +50 -0
- data/lib/ai4r/search/mcts.rb +118 -0
- data/lib/ai4r/search.rb +12 -0
- data/lib/ai4r/som/distance_metrics.rb +29 -0
- data/lib/ai4r/som/layer.rb +28 -17
- data/lib/ai4r/som/node.rb +61 -32
- data/lib/ai4r/som/som.rb +158 -41
- data/lib/ai4r/som/two_phase_layer.rb +21 -25
- data/lib/ai4r/version.rb +3 -0
- data/lib/ai4r.rb +57 -28
- metadata +79 -109
- data/README.rdoc +0 -39
- data/test/classifiers/hyperpipes_test.rb +0 -84
- data/test/classifiers/ib1_test.rb +0 -78
- data/test/classifiers/id3_test.rb +0 -220
- data/test/classifiers/multilayer_perceptron_test.rb +0 -79
- data/test/classifiers/naive_bayes_test.rb +0 -43
- data/test/classifiers/one_r_test.rb +0 -62
- data/test/classifiers/prism_test.rb +0 -85
- data/test/classifiers/simple_linear_regression_test.rb +0 -37
- data/test/classifiers/zero_r_test.rb +0 -50
- data/test/clusterers/average_linkage_test.rb +0 -51
- data/test/clusterers/bisecting_k_means_test.rb +0 -66
- data/test/clusterers/centroid_linkage_test.rb +0 -53
- data/test/clusterers/complete_linkage_test.rb +0 -57
- data/test/clusterers/diana_test.rb +0 -69
- data/test/clusterers/k_means_test.rb +0 -167
- data/test/clusterers/median_linkage_test.rb +0 -53
- data/test/clusterers/single_linkage_test.rb +0 -122
- data/test/clusterers/ward_linkage_hierarchical_test.rb +0 -81
- data/test/clusterers/ward_linkage_test.rb +0 -53
- data/test/clusterers/weighted_average_linkage_test.rb +0 -53
- data/test/data/data_set_test.rb +0 -104
- data/test/data/proximity_test.rb +0 -87
- data/test/data/statistics_test.rb +0 -65
- data/test/experiment/classifier_evaluator_test.rb +0 -76
- data/test/genetic_algorithm/chromosome_test.rb +0 -57
- data/test/genetic_algorithm/genetic_algorithm_test.rb +0 -81
- data/test/neural_network/backpropagation_test.rb +0 -82
- data/test/neural_network/hopfield_test.rb +0 -72
- data/test/som/som_test.rb +0 -97
@@ -1,173 +1,354 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
# Author:: Sergio Fierens
|
2
4
|
# License:: MPL 1.1
|
3
5
|
# Project:: ai4r
|
4
|
-
# Url::
|
6
|
+
# Url:: https://github.com/SergioFierens/ai4r
|
5
7
|
#
|
6
|
-
# You can redistribute it and/or modify it under the terms of
|
7
|
-
# the Mozilla Public License version 1.1 as published by the
|
8
|
+
# You can redistribute it and/or modify it under the terms of
|
9
|
+
# the Mozilla Public License version 1.1 as published by the
|
8
10
|
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
11
|
|
10
|
-
|
12
|
+
require_relative '../data/parameterizable'
|
13
|
+
require_relative 'activation_functions'
|
14
|
+
require_relative 'weight_initializations'
|
11
15
|
|
12
16
|
module Ai4r
|
13
|
-
|
14
|
-
#
|
15
|
-
#
|
16
|
-
#
|
17
|
+
# Artificial Neural Networks are mathematical or computational models based on
|
18
|
+
# biological neural networks.
|
19
|
+
#
|
17
20
|
# More about neural networks:
|
18
|
-
#
|
21
|
+
#
|
19
22
|
# * http://en.wikipedia.org/wiki/Artificial_neural_network
|
20
23
|
#
|
21
24
|
module NeuralNetwork
|
22
|
-
|
23
25
|
# = Introduction
|
24
|
-
#
|
26
|
+
#
|
25
27
|
# This is an implementation of a multilayer perceptron network, using
|
26
28
|
# the backpropagation algorithm for learning.
|
27
|
-
#
|
28
|
-
# Backpropagation is a supervised learning technique (described
|
29
|
-
# by Paul Werbos in 1974, and further developed by David E.
|
29
|
+
#
|
30
|
+
# Backpropagation is a supervised learning technique (described
|
31
|
+
# by Paul Werbos in 1974, and further developed by David E.
|
30
32
|
# Rumelhart, Geoffrey E. Hinton and Ronald J. Williams in 1986)
|
31
|
-
#
|
33
|
+
#
|
32
34
|
# = Features
|
33
|
-
#
|
35
|
+
#
|
34
36
|
# * Support for any network architecture (number of layers and neurons)
|
35
37
|
# * Configurable propagation function
|
36
|
-
# * Optional usage of bias
|
38
|
+
# * Optional usage of bias
|
37
39
|
# * Configurable momentum
|
38
40
|
# * Configurable learning rate
|
39
41
|
# * Configurable initial weight function
|
40
42
|
# * 100% ruby code, no external dependency
|
41
|
-
#
|
43
|
+
#
|
42
44
|
# = Parameters
|
43
|
-
#
|
45
|
+
#
|
44
46
|
# Use class method get_parameters_info to obtain details on the algorithm
|
45
47
|
# parameters. Use set_parameters to set values for this parameters.
|
46
|
-
#
|
48
|
+
#
|
47
49
|
# * :disable_bias => If true, the algorithm will not use bias nodes.
|
48
50
|
# False by default.
|
49
|
-
# * :initial_weight_function => f(n, i, j) must return the initial
|
50
|
-
# weight for the conection between the node i in layer n, and node j in
|
51
|
+
# * :initial_weight_function => f(n, i, j) must return the initial
|
52
|
+
# weight for the conection between the node i in layer n, and node j in
|
51
53
|
# layer n+1. By default a random number in [-1, 1) range.
|
52
|
-
# * :propagation_function => By default:
|
54
|
+
# * :propagation_function => By default:
|
53
55
|
# lambda { |x| 1/(1+Math.exp(-1*(x))) }
|
54
|
-
# * :derivative_propagation_function => Derivative of the propagation
|
55
|
-
# function, based on propagation function output.
|
56
|
+
# * :derivative_propagation_function => Derivative of the propagation
|
57
|
+
# function, based on propagation function output.
|
56
58
|
# By default: lambda { |y| y*(1-y) }, where y=propagation_function(x)
|
57
|
-
# * :
|
59
|
+
# * :activation => Built-in activation name (:sigmoid, :tanh or :relu).
|
60
|
+
# Selecting this overrides propagation_function and derivative_propagation_function.
|
61
|
+
# Default: :sigmoid
|
62
|
+
# * :learning_rate => By default 0.25
|
58
63
|
# * :momentum => By default 0.1. Set this parameter to 0 to disable
|
59
64
|
# momentum
|
60
|
-
#
|
65
|
+
#
|
61
66
|
# = How to use it
|
62
|
-
#
|
67
|
+
#
|
63
68
|
# # Create the network with 4 inputs, 1 hidden layer with 3 neurons,
|
64
69
|
# # and 2 outputs
|
65
|
-
# net = Ai4r::NeuralNetwork::Backpropagation.new([4, 3, 2])
|
70
|
+
# net = Ai4r::NeuralNetwork::Backpropagation.new([4, 3, 2])
|
66
71
|
#
|
67
|
-
# # Train the network
|
72
|
+
# # Train the network
|
68
73
|
# 1000.times do |i|
|
69
74
|
# net.train(example[i], result[i])
|
70
75
|
# end
|
71
|
-
#
|
76
|
+
#
|
72
77
|
# # Use it: Evaluate data with the trained network
|
73
|
-
# net.eval([12, 48, 12, 25])
|
74
|
-
# => [0.86, 0.01]
|
75
|
-
#
|
78
|
+
# net.eval([12, 48, 12, 25])
|
79
|
+
# => [0.86, 0.01]
|
80
|
+
#
|
76
81
|
# More about multilayer perceptron neural networks and backpropagation:
|
77
|
-
#
|
82
|
+
#
|
78
83
|
# * http://en.wikipedia.org/wiki/Backpropagation
|
79
84
|
# * http://en.wikipedia.org/wiki/Multilayer_perceptron
|
80
|
-
#
|
85
|
+
#
|
81
86
|
# = About the project
|
82
87
|
# Author:: Sergio Fierens
|
83
88
|
# License:: MPL 1.1
|
84
|
-
# Url::
|
89
|
+
# Url:: https://github.com/SergioFierens/ai4r
|
85
90
|
class Backpropagation
|
86
|
-
|
87
91
|
include Ai4r::Data::Parameterizable
|
88
|
-
|
89
|
-
parameters_info :disable_bias => "If true, the algorithm will not use "+
|
90
|
-
"bias nodes. False by default.",
|
91
|
-
:initial_weight_function => "f(n, i, j) must return the initial "+
|
92
|
-
"weight for the conection between the node i in layer n, and "+
|
93
|
-
"node j in layer n+1. By default a random number in [-1, 1) range.",
|
94
|
-
:propagation_function => "By default: " +
|
95
|
-
"lambda { |x| 1/(1+Math.exp(-1*(x))) }",
|
96
|
-
:derivative_propagation_function => "Derivative of the propagation "+
|
97
|
-
"function, based on propagation function output. By default: " +
|
98
|
-
"lambda { |y| y*(1-y) }, where y=propagation_function(x)",
|
99
|
-
:learning_rate => "By default 0.25",
|
100
|
-
:momentum => "By default 0.1. Set this parameter to 0 to disable "+
|
101
|
-
"momentum."
|
102
|
-
|
92
|
+
|
103
93
|
attr_accessor :structure, :weights, :activation_nodes, :last_changes
|
104
|
-
|
94
|
+
|
95
|
+
# When the activation parameter changes, update internal lambdas for each
|
96
|
+
# layer. Accepts a single symbol or an array of symbols (one for each
|
97
|
+
# layer except the input layer).
|
98
|
+
# @param symbols [Object]
|
99
|
+
# @return [Object]
|
100
|
+
def activation=(symbols)
|
101
|
+
symbols = [symbols] unless symbols.is_a?(Array)
|
102
|
+
layer_count = @structure.length - 1
|
103
|
+
if symbols.length == 1
|
104
|
+
symbols = Array.new(layer_count, symbols.first)
|
105
|
+
elsif symbols.length != layer_count
|
106
|
+
raise ArgumentError, "Activation array size must match number of layers (#{layer_count})"
|
107
|
+
end
|
108
|
+
@activation = symbols
|
109
|
+
@propagation_functions = @activation.map do |a|
|
110
|
+
Ai4r::NeuralNetwork::ActivationFunctions::FUNCTIONS[a] ||
|
111
|
+
Ai4r::NeuralNetwork::ActivationFunctions::FUNCTIONS[:sigmoid]
|
112
|
+
end
|
113
|
+
@derivative_functions = @activation.map do |a|
|
114
|
+
Ai4r::NeuralNetwork::ActivationFunctions::DERIVATIVES[a] ||
|
115
|
+
Ai4r::NeuralNetwork::ActivationFunctions::DERIVATIVES[:sigmoid]
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
# @return [Object]
|
120
|
+
def activation
|
121
|
+
if @activation.is_a?(Array)
|
122
|
+
if @set_by_loss || (@loss_function == :cross_entropy && @activation_overridden)
|
123
|
+
@activation.first
|
124
|
+
else
|
125
|
+
@activation
|
126
|
+
end
|
127
|
+
else
|
128
|
+
@activation
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
# @param symbol [Object]
|
133
|
+
# @return [Object]
|
134
|
+
def weight_init=(symbol)
|
135
|
+
@weight_init = symbol
|
136
|
+
@initial_weight_function = case symbol
|
137
|
+
when :xavier
|
138
|
+
Ai4r::NeuralNetwork::WeightInitializations.xavier(@structure)
|
139
|
+
when :he
|
140
|
+
Ai4r::NeuralNetwork::WeightInitializations.he(@structure)
|
141
|
+
else
|
142
|
+
Ai4r::NeuralNetwork::WeightInitializations.uniform
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
# @param symbol [Object]
|
147
|
+
# @return [Object]
|
148
|
+
def loss_function=(symbol)
|
149
|
+
@loss_function = symbol
|
150
|
+
return unless symbol == :cross_entropy && !@activation_overridden && !@custom_propagation
|
151
|
+
|
152
|
+
@set_by_loss = true
|
153
|
+
self.activation = :softmax
|
154
|
+
@activation_overridden = false
|
155
|
+
end
|
156
|
+
|
105
157
|
# Creates a new network specifying the its architecture.
|
106
158
|
# E.g.
|
107
|
-
#
|
159
|
+
#
|
108
160
|
# net = Backpropagation.new([4, 3, 2]) # 4 inputs
|
109
|
-
# # 1 hidden layer with 3 neurons,
|
110
|
-
# # 2 outputs
|
161
|
+
# # 1 hidden layer with 3 neurons,
|
162
|
+
# # 2 outputs
|
111
163
|
# net = Backpropagation.new([2, 3, 3, 4]) # 2 inputs
|
112
|
-
# # 2 hidden layer with 3 neurons each,
|
113
|
-
# # 4 outputs
|
164
|
+
# # 2 hidden layer with 3 neurons each,
|
165
|
+
# # 4 outputs
|
114
166
|
# net = Backpropagation.new([2, 1]) # 2 inputs
|
115
167
|
# # No hidden layer
|
116
|
-
# # 1 output
|
117
|
-
|
168
|
+
# # 1 output
|
169
|
+
# @param network_structure [Object]
|
170
|
+
# @param activation [Object]
|
171
|
+
# @param weight_init [Object]
|
172
|
+
# @return [Object]
|
173
|
+
def initialize(network_structure, activation = :sigmoid, weight_init = :uniform)
|
118
174
|
@structure = network_structure
|
119
|
-
|
120
|
-
@
|
121
|
-
@
|
175
|
+
self.weight_init = weight_init
|
176
|
+
@custom_propagation = false
|
177
|
+
@set_by_loss = true
|
178
|
+
self.activation = activation
|
179
|
+
@activation_overridden = (activation != :sigmoid)
|
180
|
+
@set_by_loss = false
|
122
181
|
@disable_bias = false
|
123
182
|
@learning_rate = 0.25
|
124
183
|
@momentum = 0.1
|
184
|
+
@loss_function = :mse
|
125
185
|
end
|
126
186
|
|
127
|
-
# Evaluates the input.
|
128
|
-
# E.g.
|
129
|
-
# net = Backpropagation.new([4, 3, 2])
|
130
187
|
# net.eval([25, 32.3, 12.8, 1.5])
|
131
188
|
# # => [0.83, 0.03]
|
189
|
+
# @param input_values [Object]
|
190
|
+
# @return [Object]
|
132
191
|
def eval(input_values)
|
133
192
|
check_input_dimension(input_values.length)
|
134
|
-
init_network
|
193
|
+
init_network unless @weights
|
135
194
|
feedforward(input_values)
|
136
|
-
|
195
|
+
@activation_nodes.last.clone
|
137
196
|
end
|
138
|
-
|
197
|
+
|
139
198
|
# Evaluates the input and returns most active node
|
140
199
|
# E.g.
|
141
200
|
# net = Backpropagation.new([4, 3, 2])
|
142
201
|
# net.eval_result([25, 32.3, 12.8, 1.5])
|
143
202
|
# # eval gives [0.83, 0.03]
|
144
203
|
# # => 0
|
204
|
+
# @param input_values [Object]
|
205
|
+
# @return [Object]
|
145
206
|
def eval_result(input_values)
|
146
207
|
result = eval(input_values)
|
147
208
|
result.index(result.max)
|
148
209
|
end
|
149
|
-
|
210
|
+
|
150
211
|
# This method trains the network using the backpropagation algorithm.
|
151
|
-
#
|
212
|
+
#
|
152
213
|
# input: Networks input
|
153
|
-
#
|
214
|
+
#
|
154
215
|
# output: Expected output for the given input.
|
155
216
|
#
|
156
|
-
# This method returns the
|
157
|
-
#
|
217
|
+
# This method returns the training loss according to +loss_function+.
|
218
|
+
# @param inputs [Object]
|
219
|
+
# @param outputs [Object]
|
220
|
+
# @return [Object]
|
158
221
|
def train(inputs, outputs)
|
159
222
|
eval(inputs)
|
160
223
|
backpropagate(outputs)
|
161
|
-
|
224
|
+
calculate_loss(outputs, @activation_nodes.last)
|
225
|
+
end
|
226
|
+
|
227
|
+
# Train a list of input/output pairs and return average loss.
|
228
|
+
# @param batch_inputs [Object]
|
229
|
+
# @param batch_outputs [Object]
|
230
|
+
# @return [Object]
|
231
|
+
def train_batch(batch_inputs, batch_outputs)
|
232
|
+
if batch_inputs.length != batch_outputs.length
|
233
|
+
raise ArgumentError,
|
234
|
+
'Inputs and outputs size mismatch'
|
235
|
+
end
|
236
|
+
|
237
|
+
batch_size = batch_inputs.length
|
238
|
+
init_network unless @weights
|
239
|
+
|
240
|
+
accumulated_changes = Array.new(@weights.length) do |w|
|
241
|
+
Array.new(@weights[w].length) do |i|
|
242
|
+
Array.new(@weights[w][i].length, 0.0)
|
243
|
+
end
|
244
|
+
end
|
245
|
+
|
246
|
+
sum_error = 0.0
|
247
|
+
batch_inputs.each_index do |idx|
|
248
|
+
inputs = batch_inputs[idx]
|
249
|
+
outputs = batch_outputs[idx]
|
250
|
+
eval(inputs)
|
251
|
+
calculate_output_deltas(outputs)
|
252
|
+
calculate_internal_deltas
|
253
|
+
|
254
|
+
(@weights.length - 1).downto(0) do |n|
|
255
|
+
@weights[n].each_index do |i|
|
256
|
+
@weights[n][i].each_index do |j|
|
257
|
+
change = @deltas[n][j] * @activation_nodes[n][i]
|
258
|
+
accumulated_changes[n][i][j] += change
|
259
|
+
end
|
260
|
+
end
|
261
|
+
end
|
262
|
+
|
263
|
+
sum_error += calculate_loss(outputs, @activation_nodes.last)
|
264
|
+
end
|
265
|
+
|
266
|
+
(@weights.length - 1).downto(0) do |n|
|
267
|
+
@weights[n].each_index do |i|
|
268
|
+
@weights[n][i].each_index do |j|
|
269
|
+
avg_change = accumulated_changes[n][i][j] / batch_size.to_f
|
270
|
+
@weights[n][i][j] += (learning_rate * avg_change) + (momentum * @last_changes[n][i][j])
|
271
|
+
@last_changes[n][i][j] = avg_change
|
272
|
+
end
|
273
|
+
end
|
274
|
+
end
|
275
|
+
|
276
|
+
sum_error / batch_size.to_f
|
277
|
+
end
|
278
|
+
|
279
|
+
# Train for a number of epochs over the dataset. Optionally define a batch size.
|
280
|
+
# Data can be shuffled between epochs passing +shuffle: true+ (default).
|
281
|
+
# Use +random_seed+ to make shuffling deterministic.
|
282
|
+
# Returns an array with the average loss of each epoch.
|
283
|
+
# @return [Object]
|
284
|
+
def train_epochs(data_inputs, data_outputs, epochs:, batch_size: 1,
|
285
|
+
early_stopping_patience: nil, min_delta: 0.0,
|
286
|
+
shuffle: true, random_seed: nil, &block)
|
287
|
+
if data_inputs.length != data_outputs.length
|
288
|
+
raise ArgumentError,
|
289
|
+
'Inputs and outputs size mismatch'
|
290
|
+
end
|
291
|
+
|
292
|
+
losses = []
|
293
|
+
best_loss = Float::INFINITY
|
294
|
+
patience = early_stopping_patience
|
295
|
+
patience_counter = 0
|
296
|
+
rng = random_seed.nil? ? Random.new : Random.new(random_seed)
|
297
|
+
epochs.times do |epoch|
|
298
|
+
epoch_error = 0.0
|
299
|
+
epoch_inputs = data_inputs
|
300
|
+
epoch_outputs = data_outputs
|
301
|
+
if shuffle
|
302
|
+
indices = (0...data_inputs.length).to_a.shuffle(random: rng)
|
303
|
+
epoch_inputs = data_inputs.values_at(*indices)
|
304
|
+
epoch_outputs = data_outputs.values_at(*indices)
|
305
|
+
end
|
306
|
+
index = 0
|
307
|
+
while index < epoch_inputs.length
|
308
|
+
batch_in = epoch_inputs[index, batch_size]
|
309
|
+
batch_out = epoch_outputs[index, batch_size]
|
310
|
+
batch_error = train_batch(batch_in, batch_out)
|
311
|
+
epoch_error += batch_error * batch_in.length
|
312
|
+
index += batch_size
|
313
|
+
end
|
314
|
+
epoch_loss = epoch_error / data_inputs.length.to_f
|
315
|
+
losses << epoch_loss
|
316
|
+
if block
|
317
|
+
if block.arity >= 3
|
318
|
+
correct = 0
|
319
|
+
data_inputs.each_index do |i|
|
320
|
+
output = eval(data_inputs[i])
|
321
|
+
predicted = output.index(output.max)
|
322
|
+
expected = data_outputs[i].index(data_outputs[i].max)
|
323
|
+
correct += 1 if predicted == expected
|
324
|
+
end
|
325
|
+
accuracy = correct.to_f / data_inputs.length
|
326
|
+
block.call(epoch, epoch_loss, accuracy)
|
327
|
+
else
|
328
|
+
block.call(epoch, epoch_loss)
|
329
|
+
end
|
330
|
+
end
|
331
|
+
if patience
|
332
|
+
if best_loss - epoch_loss > min_delta
|
333
|
+
best_loss = epoch_loss
|
334
|
+
patience_counter = 0
|
335
|
+
else
|
336
|
+
patience_counter += 1
|
337
|
+
break if patience_counter >= patience
|
338
|
+
end
|
339
|
+
end
|
340
|
+
end
|
341
|
+
losses
|
162
342
|
end
|
163
|
-
|
164
|
-
# Initialize (or reset) activation nodes and weights, with the
|
343
|
+
|
344
|
+
# Initialize (or reset) activation nodes and weights, with the
|
165
345
|
# provided net structure and parameters.
|
346
|
+
# @return [Object]
|
166
347
|
def init_network
|
167
348
|
init_activation_nodes
|
168
349
|
init_weights
|
169
350
|
init_last_changes
|
170
|
-
|
351
|
+
self
|
171
352
|
end
|
172
353
|
|
173
354
|
protected
|
@@ -179,6 +360,7 @@ module Ai4r
|
|
179
360
|
# * propagation_function
|
180
361
|
# * derivative_propagation_function
|
181
362
|
# you must restore their values manually after loading the instance.
|
363
|
+
# @return [Object]
|
182
364
|
def marshal_dump
|
183
365
|
[
|
184
366
|
@structure,
|
@@ -187,10 +369,13 @@ module Ai4r
|
|
187
369
|
@momentum,
|
188
370
|
@weights,
|
189
371
|
@last_changes,
|
190
|
-
@activation_nodes
|
372
|
+
@activation_nodes,
|
373
|
+
@activation
|
191
374
|
]
|
192
375
|
end
|
193
376
|
|
377
|
+
# @param ary [Object]
|
378
|
+
# @return [Object]
|
194
379
|
def marshal_load(ary)
|
195
380
|
@structure,
|
196
381
|
@disable_bias,
|
@@ -198,140 +383,220 @@ module Ai4r
|
|
198
383
|
@momentum,
|
199
384
|
@weights,
|
200
385
|
@last_changes,
|
201
|
-
@activation_nodes
|
202
|
-
|
203
|
-
|
204
|
-
|
386
|
+
@activation_nodes,
|
387
|
+
@activation = ary
|
388
|
+
self.weight_init = :uniform
|
389
|
+
self.activation = @activation || :sigmoid
|
205
390
|
end
|
206
391
|
|
207
|
-
|
208
392
|
# Propagate error backwards
|
393
|
+
# @param expected_output_values [Object]
|
394
|
+
# @return [Object]
|
209
395
|
def backpropagate(expected_output_values)
|
210
396
|
check_output_dimension(expected_output_values.length)
|
211
397
|
calculate_output_deltas(expected_output_values)
|
212
398
|
calculate_internal_deltas
|
213
399
|
update_weights
|
214
400
|
end
|
215
|
-
|
401
|
+
|
216
402
|
# Propagate values forward
|
403
|
+
# @param input_values [Object]
|
404
|
+
# @return [Object]
|
217
405
|
def feedforward(input_values)
|
218
|
-
input_values.each_index do |input_index|
|
406
|
+
input_values.each_index do |input_index|
|
219
407
|
@activation_nodes.first[input_index] = input_values[input_index]
|
220
408
|
end
|
221
409
|
@weights.each_index do |n|
|
222
|
-
@structure[n+1].
|
223
|
-
|
410
|
+
sums = Array.new(@structure[n + 1], 0.0)
|
411
|
+
@structure[n + 1].times do |j|
|
224
412
|
@activation_nodes[n].each_index do |i|
|
225
|
-
|
413
|
+
sums[j] += (@activation_nodes[n][i] * @weights[n][i][j])
|
226
414
|
end
|
227
|
-
@activation_nodes[n+1][j] = @propagation_function.call(sum)
|
228
415
|
end
|
229
|
-
|
416
|
+
if @activation[n] == :softmax
|
417
|
+
values = @propagation_functions[n].call(sums)
|
418
|
+
values.each_index { |j| @activation_nodes[n + 1][j] = values[j] }
|
419
|
+
else
|
420
|
+
sums.each_index do |j|
|
421
|
+
@activation_nodes[n + 1][j] = @propagation_functions[n].call(sums[j])
|
422
|
+
end
|
423
|
+
end
|
424
|
+
end
|
230
425
|
end
|
231
|
-
|
426
|
+
|
232
427
|
# Initialize neurons structure.
|
428
|
+
# @return [Object]
|
233
429
|
def init_activation_nodes
|
234
|
-
@activation_nodes = Array.new(@structure.length) do |n|
|
430
|
+
@activation_nodes = Array.new(@structure.length) do |n|
|
235
431
|
Array.new(@structure[n], 1.0)
|
236
432
|
end
|
237
|
-
if
|
238
|
-
|
239
|
-
|
433
|
+
return if disable_bias
|
434
|
+
|
435
|
+
@activation_nodes[0...-1].each { |layer| layer << 1.0 }
|
240
436
|
end
|
241
|
-
|
437
|
+
|
242
438
|
# Initialize the weight arrays using function specified with the
|
243
439
|
# initial_weight_function parameter
|
440
|
+
# @return [Object]
|
244
441
|
def init_weights
|
245
|
-
@weights = Array.new(@structure.length-1) do |i|
|
442
|
+
@weights = Array.new(@structure.length - 1) do |i|
|
246
443
|
nodes_origin = @activation_nodes[i].length
|
247
|
-
nodes_target = @structure[i+1]
|
444
|
+
nodes_target = @structure[i + 1]
|
248
445
|
Array.new(nodes_origin) do |j|
|
249
|
-
Array.new(nodes_target) do |k|
|
446
|
+
Array.new(nodes_target) do |k|
|
250
447
|
@initial_weight_function.call(i, j, k)
|
251
448
|
end
|
252
449
|
end
|
253
450
|
end
|
254
|
-
end
|
451
|
+
end
|
255
452
|
|
256
|
-
# Momentum usage need to know how much a weight changed in the
|
257
|
-
# previous training. This method initialize the @last_changes
|
453
|
+
# Momentum usage need to know how much a weight changed in the
|
454
|
+
# previous training. This method initialize the @last_changes
|
258
455
|
# structure with 0 values.
|
456
|
+
# @return [Object]
|
259
457
|
def init_last_changes
|
260
458
|
@last_changes = Array.new(@weights.length) do |w|
|
261
|
-
Array.new(@weights[w].length) do |i|
|
459
|
+
Array.new(@weights[w].length) do |i|
|
262
460
|
Array.new(@weights[w][i].length, 0.0)
|
263
461
|
end
|
264
462
|
end
|
265
463
|
end
|
266
|
-
|
464
|
+
|
267
465
|
# Calculate deltas for output layer
|
466
|
+
# @param expected_values [Object]
|
467
|
+
# @return [Object]
|
268
468
|
def calculate_output_deltas(expected_values)
|
269
469
|
output_values = @activation_nodes.last
|
270
470
|
output_deltas = []
|
471
|
+
func = @derivative_functions.last
|
271
472
|
output_values.each_index do |output_index|
|
272
|
-
|
273
|
-
|
274
|
-
|
473
|
+
if @loss_function == :cross_entropy && @activation == :softmax
|
474
|
+
output_deltas << (output_values[output_index] - expected_values[output_index])
|
475
|
+
else
|
476
|
+
error = expected_values[output_index] - output_values[output_index]
|
477
|
+
output_deltas << (func.call(output_values[output_index]) * error)
|
478
|
+
end
|
275
479
|
end
|
276
480
|
@deltas = [output_deltas]
|
277
481
|
end
|
278
|
-
|
482
|
+
|
279
483
|
# Calculate deltas for hidden layers
|
484
|
+
# @return [Object]
|
280
485
|
def calculate_internal_deltas
|
281
486
|
prev_deltas = @deltas.last
|
282
|
-
(@activation_nodes.length-2).downto(1) do |layer_index|
|
487
|
+
(@activation_nodes.length - 2).downto(1) do |layer_index|
|
283
488
|
layer_deltas = []
|
284
489
|
@activation_nodes[layer_index].each_index do |j|
|
285
490
|
error = 0.0
|
286
|
-
@structure[layer_index+1].times do |k|
|
491
|
+
@structure[layer_index + 1].times do |k|
|
287
492
|
error += prev_deltas[k] * @weights[layer_index][j][k]
|
288
493
|
end
|
289
|
-
|
290
|
-
|
494
|
+
func = @derivative_functions[layer_index - 1]
|
495
|
+
layer_deltas[j] = func.call(@activation_nodes[layer_index][j]) * error
|
291
496
|
end
|
292
497
|
prev_deltas = layer_deltas
|
293
498
|
@deltas.unshift(layer_deltas)
|
294
499
|
end
|
295
500
|
end
|
296
|
-
|
501
|
+
|
297
502
|
# Update weights after @deltas have been calculated.
|
503
|
+
# @return [Object]
|
298
504
|
def update_weights
|
299
|
-
(@weights.length-1).downto(0) do |n|
|
300
|
-
@weights[n].each_index do |i|
|
301
|
-
@weights[n][i].each_index do |j|
|
302
|
-
change = @deltas[n][j]
|
303
|
-
@weights[n][i][j] += (
|
304
|
-
momentum * @last_changes[n][i][j])
|
505
|
+
(@weights.length - 1).downto(0) do |n|
|
506
|
+
@weights[n].each_index do |i|
|
507
|
+
@weights[n][i].each_index do |j|
|
508
|
+
change = @deltas[n][j] * @activation_nodes[n][i]
|
509
|
+
@weights[n][i][j] += ((learning_rate * change) +
|
510
|
+
(momentum * @last_changes[n][i][j]))
|
305
511
|
@last_changes[n][i][j] = change
|
306
512
|
end
|
307
513
|
end
|
308
514
|
end
|
309
515
|
end
|
310
|
-
|
311
|
-
# Calculate quadratic error for
|
516
|
+
|
517
|
+
# Calculate quadratic error for an expected output value
|
312
518
|
# Error = 0.5 * sum( (expected_value[i] - output_value[i])**2 )
|
519
|
+
# @param expected_output [Object]
|
520
|
+
# @return [Object]
|
313
521
|
def calculate_error(expected_output)
|
314
522
|
output_values = @activation_nodes.last
|
315
523
|
error = 0.0
|
316
524
|
expected_output.each_index do |output_index|
|
317
|
-
error +=
|
318
|
-
0.5*(output_values[output_index]-expected_output[output_index])**2
|
525
|
+
error +=
|
526
|
+
0.5 * ((output_values[output_index] - expected_output[output_index])**2)
|
527
|
+
end
|
528
|
+
error
|
529
|
+
end
|
530
|
+
|
531
|
+
# Calculate loss for expected/actual vectors according to selected
|
532
|
+
# loss_function (:mse or :cross_entropy).
|
533
|
+
# @param expected [Object]
|
534
|
+
# @param actual [Object]
|
535
|
+
# @return [Object]
|
536
|
+
def calculate_loss(expected, actual)
|
537
|
+
case @loss_function
|
538
|
+
when :cross_entropy
|
539
|
+
epsilon = 1e-12
|
540
|
+
loss = 0.0
|
541
|
+
if @activation == :softmax
|
542
|
+
expected.each_index do |i|
|
543
|
+
p = [[actual[i], epsilon].max, 1 - epsilon].min
|
544
|
+
loss -= expected[i] * Math.log(p)
|
545
|
+
end
|
546
|
+
else
|
547
|
+
expected.each_index do |i|
|
548
|
+
p = [[actual[i], epsilon].max, 1 - epsilon].min
|
549
|
+
loss -= (expected[i] * Math.log(p)) + ((1 - expected[i]) * Math.log(1 - p))
|
550
|
+
end
|
551
|
+
end
|
552
|
+
loss
|
553
|
+
else
|
554
|
+
# Mean squared error
|
555
|
+
error = 0.0
|
556
|
+
expected.each_index do |i|
|
557
|
+
error += 0.5 * ((expected[i] - actual[i])**2)
|
558
|
+
end
|
559
|
+
error
|
319
560
|
end
|
320
|
-
return error
|
321
561
|
end
|
322
|
-
|
562
|
+
|
563
|
+
# @param inputs [Object]
|
564
|
+
# @return [Object]
|
323
565
|
def check_input_dimension(inputs)
|
324
|
-
|
325
|
-
|
326
|
-
|
566
|
+
return unless inputs != @structure.first
|
567
|
+
|
568
|
+
raise ArgumentError, 'Wrong number of inputs. ' \
|
569
|
+
"Expected: #{@structure.first}, " \
|
570
|
+
"received: #{inputs}."
|
327
571
|
end
|
328
572
|
|
573
|
+
# @param outputs [Object]
|
574
|
+
# @return [Object]
|
329
575
|
def check_output_dimension(outputs)
|
330
|
-
|
331
|
-
|
332
|
-
|
576
|
+
return unless outputs != @structure.last
|
577
|
+
|
578
|
+
raise ArgumentError, 'Wrong number of outputs. ' \
|
579
|
+
"Expected: #{@structure.last}, " \
|
580
|
+
"received: #{outputs}."
|
333
581
|
end
|
334
|
-
|
582
|
+
|
583
|
+
parameters_info disable_bias: 'If true, the algorithm will not use ' \
|
584
|
+
'bias nodes. False by default.',
|
585
|
+
initial_weight_function: 'f(n, i, j) must return the initial ' \
|
586
|
+
'weight for the conection between the node i in layer n, and ' \
|
587
|
+
'node j in layer n+1. By default a random number in [-1, 1) range.',
|
588
|
+
weight_init: 'Built-in weight initialization strategy (:uniform, :xavier or :he). Default: :uniform',
|
589
|
+
propagation_function: 'By default: ' \
|
590
|
+
'lambda { |x| 1/(1+Math.exp(-1*(x))) }',
|
591
|
+
derivative_propagation_function: 'Derivative of the propagation ' \
|
592
|
+
'function, based on propagation function output. By default: ' \
|
593
|
+
'lambda { |y| y*(1-y) }, where y=propagation_function(x)',
|
594
|
+
activation: 'Activation function per layer. Provide a symbol or an array of symbols (:sigmoid, :tanh, :relu or :softmax). Default: :sigmoid',
|
595
|
+
learning_rate: 'By default 0.25',
|
596
|
+
momentum: 'By default 0.1. Set this parameter to 0 to disable ' \
|
597
|
+
'momentum.',
|
598
|
+
loss_function: 'Loss function used when training (:mse or ' \
|
599
|
+
':cross_entropy). Default: :mse'
|
335
600
|
end
|
336
601
|
end
|
337
602
|
end
|