ai4r 1.12 → 2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +174 -0
- data/examples/classifiers/hyperpipes_data.csv +14 -0
- data/examples/classifiers/hyperpipes_example.rb +22 -0
- data/examples/classifiers/ib1_example.rb +12 -0
- data/examples/classifiers/id3_example.rb +15 -10
- data/examples/classifiers/id3_graphviz_example.rb +17 -0
- data/examples/classifiers/logistic_regression_example.rb +11 -0
- data/examples/classifiers/naive_bayes_attributes_example.rb +13 -0
- data/examples/classifiers/naive_bayes_example.rb +12 -13
- data/examples/classifiers/one_r_example.rb +27 -0
- data/examples/classifiers/parameter_tutorial.rb +29 -0
- data/examples/classifiers/prism_nominal_example.rb +15 -0
- data/examples/classifiers/prism_numeric_example.rb +21 -0
- data/examples/classifiers/simple_linear_regression_example.csv +159 -0
- data/examples/classifiers/simple_linear_regression_example.rb +18 -0
- data/examples/classifiers/zero_and_one_r_example.rb +34 -0
- data/examples/classifiers/zero_one_r_data.csv +8 -0
- data/examples/clusterers/clusterer_example.rb +62 -0
- data/examples/clusterers/dbscan_example.rb +17 -0
- data/examples/clusterers/dendrogram_example.rb +17 -0
- data/examples/clusterers/hierarchical_dendrogram_example.rb +20 -0
- data/examples/clusterers/kmeans_custom_example.rb +26 -0
- data/examples/genetic_algorithm/bitstring_example.rb +41 -0
- data/examples/genetic_algorithm/genetic_algorithm_example.rb +26 -18
- data/examples/genetic_algorithm/kmeans_seed_tuning.rb +45 -0
- data/examples/neural_network/backpropagation_example.rb +49 -48
- data/examples/neural_network/hopfield_example.rb +45 -0
- data/examples/neural_network/patterns_with_base_noise.rb +39 -39
- data/examples/neural_network/patterns_with_noise.rb +41 -39
- data/examples/neural_network/train_epochs_callback.rb +25 -0
- data/examples/neural_network/training_patterns.rb +39 -39
- data/examples/neural_network/transformer_text_classification.rb +78 -0
- data/examples/neural_network/xor_example.rb +23 -22
- data/examples/reinforcement/q_learning_example.rb +10 -0
- data/examples/som/som_data.rb +155 -152
- data/examples/som/som_multi_node_example.rb +12 -13
- data/examples/som/som_single_example.rb +12 -15
- data/examples/transformer/decode_classifier_example.rb +68 -0
- data/examples/transformer/deterministic_example.rb +10 -0
- data/examples/transformer/seq2seq_example.rb +16 -0
- data/lib/ai4r/classifiers/classifier.rb +24 -16
- data/lib/ai4r/classifiers/gradient_boosting.rb +64 -0
- data/lib/ai4r/classifiers/hyperpipes.rb +119 -43
- data/lib/ai4r/classifiers/ib1.rb +122 -32
- data/lib/ai4r/classifiers/id3.rb +527 -144
- data/lib/ai4r/classifiers/logistic_regression.rb +96 -0
- data/lib/ai4r/classifiers/multilayer_perceptron.rb +75 -59
- data/lib/ai4r/classifiers/naive_bayes.rb +112 -48
- data/lib/ai4r/classifiers/one_r.rb +112 -44
- data/lib/ai4r/classifiers/prism.rb +167 -76
- data/lib/ai4r/classifiers/random_forest.rb +72 -0
- data/lib/ai4r/classifiers/simple_linear_regression.rb +143 -0
- data/lib/ai4r/classifiers/support_vector_machine.rb +91 -0
- data/lib/ai4r/classifiers/votes.rb +57 -0
- data/lib/ai4r/classifiers/zero_r.rb +71 -30
- data/lib/ai4r/clusterers/average_linkage.rb +46 -27
- data/lib/ai4r/clusterers/bisecting_k_means.rb +50 -44
- data/lib/ai4r/clusterers/centroid_linkage.rb +52 -36
- data/lib/ai4r/clusterers/cluster_tree.rb +50 -0
- data/lib/ai4r/clusterers/clusterer.rb +28 -24
- data/lib/ai4r/clusterers/complete_linkage.rb +42 -31
- data/lib/ai4r/clusterers/dbscan.rb +134 -0
- data/lib/ai4r/clusterers/diana.rb +75 -49
- data/lib/ai4r/clusterers/k_means.rb +309 -72
- data/lib/ai4r/clusterers/median_linkage.rb +49 -33
- data/lib/ai4r/clusterers/single_linkage.rb +196 -88
- data/lib/ai4r/clusterers/ward_linkage.rb +51 -35
- data/lib/ai4r/clusterers/ward_linkage_hierarchical.rb +63 -0
- data/lib/ai4r/clusterers/weighted_average_linkage.rb +48 -32
- data/lib/ai4r/data/data_set.rb +229 -100
- data/lib/ai4r/data/parameterizable.rb +31 -25
- data/lib/ai4r/data/proximity.rb +72 -50
- data/lib/ai4r/data/statistics.rb +46 -35
- data/lib/ai4r/experiment/classifier_evaluator.rb +84 -32
- data/lib/ai4r/experiment/split.rb +39 -0
- data/lib/ai4r/genetic_algorithm/chromosome_base.rb +43 -0
- data/lib/ai4r/genetic_algorithm/genetic_algorithm.rb +92 -170
- data/lib/ai4r/genetic_algorithm/tsp_chromosome.rb +83 -0
- data/lib/ai4r/hmm/hidden_markov_model.rb +134 -0
- data/lib/ai4r/neural_network/activation_functions.rb +37 -0
- data/lib/ai4r/neural_network/backpropagation.rb +419 -143
- data/lib/ai4r/neural_network/hopfield.rb +175 -58
- data/lib/ai4r/neural_network/transformer.rb +194 -0
- data/lib/ai4r/neural_network/weight_initializations.rb +40 -0
- data/lib/ai4r/reinforcement/policy_iteration.rb +66 -0
- data/lib/ai4r/reinforcement/q_learning.rb +51 -0
- data/lib/ai4r/search/a_star.rb +76 -0
- data/lib/ai4r/search/bfs.rb +50 -0
- data/lib/ai4r/search/dfs.rb +50 -0
- data/lib/ai4r/search/mcts.rb +118 -0
- data/lib/ai4r/search.rb +12 -0
- data/lib/ai4r/som/distance_metrics.rb +29 -0
- data/lib/ai4r/som/layer.rb +28 -17
- data/lib/ai4r/som/node.rb +61 -32
- data/lib/ai4r/som/som.rb +158 -41
- data/lib/ai4r/som/two_phase_layer.rb +21 -25
- data/lib/ai4r/version.rb +3 -0
- data/lib/ai4r.rb +58 -27
- metadata +117 -106
- data/README.rdoc +0 -44
- data/test/classifiers/hyperpipes_test.rb +0 -84
- data/test/classifiers/ib1_test.rb +0 -78
- data/test/classifiers/id3_test.rb +0 -208
- data/test/classifiers/multilayer_perceptron_test.rb +0 -79
- data/test/classifiers/naive_bayes_test.rb +0 -43
- data/test/classifiers/one_r_test.rb +0 -62
- data/test/classifiers/prism_test.rb +0 -85
- data/test/classifiers/zero_r_test.rb +0 -50
- data/test/clusterers/average_linkage_test.rb +0 -51
- data/test/clusterers/bisecting_k_means_test.rb +0 -66
- data/test/clusterers/centroid_linkage_test.rb +0 -53
- data/test/clusterers/complete_linkage_test.rb +0 -57
- data/test/clusterers/diana_test.rb +0 -69
- data/test/clusterers/k_means_test.rb +0 -100
- data/test/clusterers/median_linkage_test.rb +0 -53
- data/test/clusterers/single_linkage_test.rb +0 -122
- data/test/clusterers/ward_linkage_test.rb +0 -53
- data/test/clusterers/weighted_average_linkage_test.rb +0 -53
- data/test/data/data_set_test.rb +0 -96
- data/test/data/proximity_test.rb +0 -81
- data/test/data/statistics_test.rb +0 -65
- data/test/experiment/classifier_evaluator_test.rb +0 -76
- data/test/genetic_algorithm/chromosome_test.rb +0 -57
- data/test/genetic_algorithm/genetic_algorithm_test.rb +0 -81
- data/test/neural_network/backpropagation_test.rb +0 -82
- data/test/neural_network/hopfield_test.rb +0 -72
- data/test/som/som_test.rb +0 -97
@@ -1,162 +1,354 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
# Author:: Sergio Fierens
|
2
4
|
# License:: MPL 1.1
|
3
5
|
# Project:: ai4r
|
4
|
-
# Url::
|
6
|
+
# Url:: https://github.com/SergioFierens/ai4r
|
5
7
|
#
|
6
|
-
# You can redistribute it and/or modify it under the terms of
|
7
|
-
# the Mozilla Public License version 1.1 as published by the
|
8
|
+
# You can redistribute it and/or modify it under the terms of
|
9
|
+
# the Mozilla Public License version 1.1 as published by the
|
8
10
|
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
11
|
|
10
|
-
|
12
|
+
require_relative '../data/parameterizable'
|
13
|
+
require_relative 'activation_functions'
|
14
|
+
require_relative 'weight_initializations'
|
11
15
|
|
12
16
|
module Ai4r
|
13
|
-
|
14
|
-
#
|
15
|
-
#
|
16
|
-
#
|
17
|
+
# Artificial Neural Networks are mathematical or computational models based on
|
18
|
+
# biological neural networks.
|
19
|
+
#
|
17
20
|
# More about neural networks:
|
18
|
-
#
|
21
|
+
#
|
19
22
|
# * http://en.wikipedia.org/wiki/Artificial_neural_network
|
20
23
|
#
|
21
24
|
module NeuralNetwork
|
22
|
-
|
23
25
|
# = Introduction
|
24
|
-
#
|
26
|
+
#
|
25
27
|
# This is an implementation of a multilayer perceptron network, using
|
26
28
|
# the backpropagation algorithm for learning.
|
27
|
-
#
|
28
|
-
# Backpropagation is a supervised learning technique (described
|
29
|
-
# by Paul Werbos in 1974, and further developed by David E.
|
29
|
+
#
|
30
|
+
# Backpropagation is a supervised learning technique (described
|
31
|
+
# by Paul Werbos in 1974, and further developed by David E.
|
30
32
|
# Rumelhart, Geoffrey E. Hinton and Ronald J. Williams in 1986)
|
31
|
-
#
|
33
|
+
#
|
32
34
|
# = Features
|
33
|
-
#
|
35
|
+
#
|
34
36
|
# * Support for any network architecture (number of layers and neurons)
|
35
37
|
# * Configurable propagation function
|
36
|
-
# * Optional usage of bias
|
38
|
+
# * Optional usage of bias
|
37
39
|
# * Configurable momentum
|
38
40
|
# * Configurable learning rate
|
39
41
|
# * Configurable initial weight function
|
40
42
|
# * 100% ruby code, no external dependency
|
41
|
-
#
|
43
|
+
#
|
42
44
|
# = Parameters
|
43
|
-
#
|
45
|
+
#
|
44
46
|
# Use class method get_parameters_info to obtain details on the algorithm
|
45
47
|
# parameters. Use set_parameters to set values for this parameters.
|
46
|
-
#
|
47
|
-
# * :disable_bias => If true, the
|
48
|
+
#
|
49
|
+
# * :disable_bias => If true, the algorithm will not use bias nodes.
|
48
50
|
# False by default.
|
49
|
-
# * :initial_weight_function => f(n, i, j) must return the initial
|
50
|
-
# weight for the conection between the node i in layer n, and node j in
|
51
|
+
# * :initial_weight_function => f(n, i, j) must return the initial
|
52
|
+
# weight for the conection between the node i in layer n, and node j in
|
51
53
|
# layer n+1. By default a random number in [-1, 1) range.
|
52
|
-
# * :propagation_function => By default:
|
54
|
+
# * :propagation_function => By default:
|
53
55
|
# lambda { |x| 1/(1+Math.exp(-1*(x))) }
|
54
|
-
# * :derivative_propagation_function => Derivative of the propagation
|
55
|
-
# function, based on propagation function output.
|
56
|
+
# * :derivative_propagation_function => Derivative of the propagation
|
57
|
+
# function, based on propagation function output.
|
56
58
|
# By default: lambda { |y| y*(1-y) }, where y=propagation_function(x)
|
57
|
-
# * :
|
59
|
+
# * :activation => Built-in activation name (:sigmoid, :tanh or :relu).
|
60
|
+
# Selecting this overrides propagation_function and derivative_propagation_function.
|
61
|
+
# Default: :sigmoid
|
62
|
+
# * :learning_rate => By default 0.25
|
58
63
|
# * :momentum => By default 0.1. Set this parameter to 0 to disable
|
59
64
|
# momentum
|
60
|
-
#
|
65
|
+
#
|
61
66
|
# = How to use it
|
62
|
-
#
|
67
|
+
#
|
63
68
|
# # Create the network with 4 inputs, 1 hidden layer with 3 neurons,
|
64
69
|
# # and 2 outputs
|
65
|
-
# net = Ai4r::NeuralNetwork::Backpropagation.new([4, 3, 2])
|
70
|
+
# net = Ai4r::NeuralNetwork::Backpropagation.new([4, 3, 2])
|
66
71
|
#
|
67
|
-
# # Train the network
|
72
|
+
# # Train the network
|
68
73
|
# 1000.times do |i|
|
69
74
|
# net.train(example[i], result[i])
|
70
75
|
# end
|
71
|
-
#
|
76
|
+
#
|
72
77
|
# # Use it: Evaluate data with the trained network
|
73
|
-
# net.eval([12, 48, 12, 25])
|
74
|
-
# => [0.86, 0.01]
|
75
|
-
#
|
78
|
+
# net.eval([12, 48, 12, 25])
|
79
|
+
# => [0.86, 0.01]
|
80
|
+
#
|
76
81
|
# More about multilayer perceptron neural networks and backpropagation:
|
77
|
-
#
|
82
|
+
#
|
78
83
|
# * http://en.wikipedia.org/wiki/Backpropagation
|
79
84
|
# * http://en.wikipedia.org/wiki/Multilayer_perceptron
|
80
|
-
#
|
85
|
+
#
|
81
86
|
# = About the project
|
82
87
|
# Author:: Sergio Fierens
|
83
88
|
# License:: MPL 1.1
|
84
|
-
# Url::
|
89
|
+
# Url:: https://github.com/SergioFierens/ai4r
|
85
90
|
class Backpropagation
|
86
|
-
|
87
91
|
include Ai4r::Data::Parameterizable
|
88
|
-
|
89
|
-
parameters_info :disable_bias => "If true, the alforithm will not use "+
|
90
|
-
"bias nodes. False by default.",
|
91
|
-
:initial_weight_function => "f(n, i, j) must return the initial "+
|
92
|
-
"weight for the conection between the node i in layer n, and "+
|
93
|
-
"node j in layer n+1. By default a random number in [-1, 1) range.",
|
94
|
-
:propagation_function => "By default: " +
|
95
|
-
"lambda { |x| 1/(1+Math.exp(-1*(x))) }",
|
96
|
-
:derivative_propagation_function => "Derivative of the propagation "+
|
97
|
-
"function, based on propagation function output. By default: " +
|
98
|
-
"lambda { |y| y*(1-y) }, where y=propagation_function(x)",
|
99
|
-
:learning_rate => "By default 0.25",
|
100
|
-
:momentum => "By default 0.1. Set this parameter to 0 to disable "+
|
101
|
-
"momentum."
|
102
|
-
|
92
|
+
|
103
93
|
attr_accessor :structure, :weights, :activation_nodes, :last_changes
|
104
|
-
|
94
|
+
|
95
|
+
# When the activation parameter changes, update internal lambdas for each
|
96
|
+
# layer. Accepts a single symbol or an array of symbols (one for each
|
97
|
+
# layer except the input layer).
|
98
|
+
# @param symbols [Object]
|
99
|
+
# @return [Object]
|
100
|
+
def activation=(symbols)
|
101
|
+
symbols = [symbols] unless symbols.is_a?(Array)
|
102
|
+
layer_count = @structure.length - 1
|
103
|
+
if symbols.length == 1
|
104
|
+
symbols = Array.new(layer_count, symbols.first)
|
105
|
+
elsif symbols.length != layer_count
|
106
|
+
raise ArgumentError, "Activation array size must match number of layers (#{layer_count})"
|
107
|
+
end
|
108
|
+
@activation = symbols
|
109
|
+
@propagation_functions = @activation.map do |a|
|
110
|
+
Ai4r::NeuralNetwork::ActivationFunctions::FUNCTIONS[a] ||
|
111
|
+
Ai4r::NeuralNetwork::ActivationFunctions::FUNCTIONS[:sigmoid]
|
112
|
+
end
|
113
|
+
@derivative_functions = @activation.map do |a|
|
114
|
+
Ai4r::NeuralNetwork::ActivationFunctions::DERIVATIVES[a] ||
|
115
|
+
Ai4r::NeuralNetwork::ActivationFunctions::DERIVATIVES[:sigmoid]
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
# @return [Object]
|
120
|
+
def activation
|
121
|
+
if @activation.is_a?(Array)
|
122
|
+
if @set_by_loss || (@loss_function == :cross_entropy && @activation_overridden)
|
123
|
+
@activation.first
|
124
|
+
else
|
125
|
+
@activation
|
126
|
+
end
|
127
|
+
else
|
128
|
+
@activation
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
# @param symbol [Object]
|
133
|
+
# @return [Object]
|
134
|
+
def weight_init=(symbol)
|
135
|
+
@weight_init = symbol
|
136
|
+
@initial_weight_function = case symbol
|
137
|
+
when :xavier
|
138
|
+
Ai4r::NeuralNetwork::WeightInitializations.xavier(@structure)
|
139
|
+
when :he
|
140
|
+
Ai4r::NeuralNetwork::WeightInitializations.he(@structure)
|
141
|
+
else
|
142
|
+
Ai4r::NeuralNetwork::WeightInitializations.uniform
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
# @param symbol [Object]
|
147
|
+
# @return [Object]
|
148
|
+
def loss_function=(symbol)
|
149
|
+
@loss_function = symbol
|
150
|
+
return unless symbol == :cross_entropy && !@activation_overridden && !@custom_propagation
|
151
|
+
|
152
|
+
@set_by_loss = true
|
153
|
+
self.activation = :softmax
|
154
|
+
@activation_overridden = false
|
155
|
+
end
|
156
|
+
|
105
157
|
# Creates a new network specifying the its architecture.
|
106
158
|
# E.g.
|
107
|
-
#
|
159
|
+
#
|
108
160
|
# net = Backpropagation.new([4, 3, 2]) # 4 inputs
|
109
|
-
# # 1 hidden layer with 3 neurons,
|
110
|
-
# # 2 outputs
|
161
|
+
# # 1 hidden layer with 3 neurons,
|
162
|
+
# # 2 outputs
|
111
163
|
# net = Backpropagation.new([2, 3, 3, 4]) # 2 inputs
|
112
|
-
# # 2 hidden layer with 3 neurons each,
|
113
|
-
# # 4 outputs
|
164
|
+
# # 2 hidden layer with 3 neurons each,
|
165
|
+
# # 4 outputs
|
114
166
|
# net = Backpropagation.new([2, 1]) # 2 inputs
|
115
167
|
# # No hidden layer
|
116
|
-
# # 1 output
|
117
|
-
|
168
|
+
# # 1 output
|
169
|
+
# @param network_structure [Object]
|
170
|
+
# @param activation [Object]
|
171
|
+
# @param weight_init [Object]
|
172
|
+
# @return [Object]
|
173
|
+
def initialize(network_structure, activation = :sigmoid, weight_init = :uniform)
|
118
174
|
@structure = network_structure
|
119
|
-
|
120
|
-
@
|
121
|
-
@
|
175
|
+
self.weight_init = weight_init
|
176
|
+
@custom_propagation = false
|
177
|
+
@set_by_loss = true
|
178
|
+
self.activation = activation
|
179
|
+
@activation_overridden = (activation != :sigmoid)
|
180
|
+
@set_by_loss = false
|
122
181
|
@disable_bias = false
|
123
182
|
@learning_rate = 0.25
|
124
183
|
@momentum = 0.1
|
184
|
+
@loss_function = :mse
|
125
185
|
end
|
126
186
|
|
127
|
-
# Evaluates the input.
|
128
|
-
# E.g.
|
129
|
-
# net = Backpropagation.new([4, 3, 2])
|
130
187
|
# net.eval([25, 32.3, 12.8, 1.5])
|
131
188
|
# # => [0.83, 0.03]
|
189
|
+
# @param input_values [Object]
|
190
|
+
# @return [Object]
|
132
191
|
def eval(input_values)
|
133
192
|
check_input_dimension(input_values.length)
|
134
|
-
init_network
|
193
|
+
init_network unless @weights
|
135
194
|
feedforward(input_values)
|
136
|
-
|
195
|
+
@activation_nodes.last.clone
|
196
|
+
end
|
197
|
+
|
198
|
+
# Evaluates the input and returns most active node
|
199
|
+
# E.g.
|
200
|
+
# net = Backpropagation.new([4, 3, 2])
|
201
|
+
# net.eval_result([25, 32.3, 12.8, 1.5])
|
202
|
+
# # eval gives [0.83, 0.03]
|
203
|
+
# # => 0
|
204
|
+
# @param input_values [Object]
|
205
|
+
# @return [Object]
|
206
|
+
def eval_result(input_values)
|
207
|
+
result = eval(input_values)
|
208
|
+
result.index(result.max)
|
137
209
|
end
|
138
|
-
|
210
|
+
|
139
211
|
# This method trains the network using the backpropagation algorithm.
|
140
|
-
#
|
212
|
+
#
|
141
213
|
# input: Networks input
|
142
|
-
#
|
214
|
+
#
|
143
215
|
# output: Expected output for the given input.
|
144
216
|
#
|
145
|
-
# This method returns the
|
146
|
-
#
|
217
|
+
# This method returns the training loss according to +loss_function+.
|
218
|
+
# @param inputs [Object]
|
219
|
+
# @param outputs [Object]
|
220
|
+
# @return [Object]
|
147
221
|
def train(inputs, outputs)
|
148
222
|
eval(inputs)
|
149
223
|
backpropagate(outputs)
|
150
|
-
|
224
|
+
calculate_loss(outputs, @activation_nodes.last)
|
225
|
+
end
|
226
|
+
|
227
|
+
# Train a list of input/output pairs and return average loss.
|
228
|
+
# @param batch_inputs [Object]
|
229
|
+
# @param batch_outputs [Object]
|
230
|
+
# @return [Object]
|
231
|
+
def train_batch(batch_inputs, batch_outputs)
|
232
|
+
if batch_inputs.length != batch_outputs.length
|
233
|
+
raise ArgumentError,
|
234
|
+
'Inputs and outputs size mismatch'
|
235
|
+
end
|
236
|
+
|
237
|
+
batch_size = batch_inputs.length
|
238
|
+
init_network unless @weights
|
239
|
+
|
240
|
+
accumulated_changes = Array.new(@weights.length) do |w|
|
241
|
+
Array.new(@weights[w].length) do |i|
|
242
|
+
Array.new(@weights[w][i].length, 0.0)
|
243
|
+
end
|
244
|
+
end
|
245
|
+
|
246
|
+
sum_error = 0.0
|
247
|
+
batch_inputs.each_index do |idx|
|
248
|
+
inputs = batch_inputs[idx]
|
249
|
+
outputs = batch_outputs[idx]
|
250
|
+
eval(inputs)
|
251
|
+
calculate_output_deltas(outputs)
|
252
|
+
calculate_internal_deltas
|
253
|
+
|
254
|
+
(@weights.length - 1).downto(0) do |n|
|
255
|
+
@weights[n].each_index do |i|
|
256
|
+
@weights[n][i].each_index do |j|
|
257
|
+
change = @deltas[n][j] * @activation_nodes[n][i]
|
258
|
+
accumulated_changes[n][i][j] += change
|
259
|
+
end
|
260
|
+
end
|
261
|
+
end
|
262
|
+
|
263
|
+
sum_error += calculate_loss(outputs, @activation_nodes.last)
|
264
|
+
end
|
265
|
+
|
266
|
+
(@weights.length - 1).downto(0) do |n|
|
267
|
+
@weights[n].each_index do |i|
|
268
|
+
@weights[n][i].each_index do |j|
|
269
|
+
avg_change = accumulated_changes[n][i][j] / batch_size.to_f
|
270
|
+
@weights[n][i][j] += (learning_rate * avg_change) + (momentum * @last_changes[n][i][j])
|
271
|
+
@last_changes[n][i][j] = avg_change
|
272
|
+
end
|
273
|
+
end
|
274
|
+
end
|
275
|
+
|
276
|
+
sum_error / batch_size.to_f
|
151
277
|
end
|
152
|
-
|
153
|
-
#
|
278
|
+
|
279
|
+
# Train for a number of epochs over the dataset. Optionally define a batch size.
|
280
|
+
# Data can be shuffled between epochs passing +shuffle: true+ (default).
|
281
|
+
# Use +random_seed+ to make shuffling deterministic.
|
282
|
+
# Returns an array with the average loss of each epoch.
|
283
|
+
# @return [Object]
|
284
|
+
def train_epochs(data_inputs, data_outputs, epochs:, batch_size: 1,
|
285
|
+
early_stopping_patience: nil, min_delta: 0.0,
|
286
|
+
shuffle: true, random_seed: nil, &block)
|
287
|
+
if data_inputs.length != data_outputs.length
|
288
|
+
raise ArgumentError,
|
289
|
+
'Inputs and outputs size mismatch'
|
290
|
+
end
|
291
|
+
|
292
|
+
losses = []
|
293
|
+
best_loss = Float::INFINITY
|
294
|
+
patience = early_stopping_patience
|
295
|
+
patience_counter = 0
|
296
|
+
rng = random_seed.nil? ? Random.new : Random.new(random_seed)
|
297
|
+
epochs.times do |epoch|
|
298
|
+
epoch_error = 0.0
|
299
|
+
epoch_inputs = data_inputs
|
300
|
+
epoch_outputs = data_outputs
|
301
|
+
if shuffle
|
302
|
+
indices = (0...data_inputs.length).to_a.shuffle(random: rng)
|
303
|
+
epoch_inputs = data_inputs.values_at(*indices)
|
304
|
+
epoch_outputs = data_outputs.values_at(*indices)
|
305
|
+
end
|
306
|
+
index = 0
|
307
|
+
while index < epoch_inputs.length
|
308
|
+
batch_in = epoch_inputs[index, batch_size]
|
309
|
+
batch_out = epoch_outputs[index, batch_size]
|
310
|
+
batch_error = train_batch(batch_in, batch_out)
|
311
|
+
epoch_error += batch_error * batch_in.length
|
312
|
+
index += batch_size
|
313
|
+
end
|
314
|
+
epoch_loss = epoch_error / data_inputs.length.to_f
|
315
|
+
losses << epoch_loss
|
316
|
+
if block
|
317
|
+
if block.arity >= 3
|
318
|
+
correct = 0
|
319
|
+
data_inputs.each_index do |i|
|
320
|
+
output = eval(data_inputs[i])
|
321
|
+
predicted = output.index(output.max)
|
322
|
+
expected = data_outputs[i].index(data_outputs[i].max)
|
323
|
+
correct += 1 if predicted == expected
|
324
|
+
end
|
325
|
+
accuracy = correct.to_f / data_inputs.length
|
326
|
+
block.call(epoch, epoch_loss, accuracy)
|
327
|
+
else
|
328
|
+
block.call(epoch, epoch_loss)
|
329
|
+
end
|
330
|
+
end
|
331
|
+
if patience
|
332
|
+
if best_loss - epoch_loss > min_delta
|
333
|
+
best_loss = epoch_loss
|
334
|
+
patience_counter = 0
|
335
|
+
else
|
336
|
+
patience_counter += 1
|
337
|
+
break if patience_counter >= patience
|
338
|
+
end
|
339
|
+
end
|
340
|
+
end
|
341
|
+
losses
|
342
|
+
end
|
343
|
+
|
344
|
+
# Initialize (or reset) activation nodes and weights, with the
|
154
345
|
# provided net structure and parameters.
|
346
|
+
# @return [Object]
|
155
347
|
def init_network
|
156
348
|
init_activation_nodes
|
157
349
|
init_weights
|
158
350
|
init_last_changes
|
159
|
-
|
351
|
+
self
|
160
352
|
end
|
161
353
|
|
162
354
|
protected
|
@@ -168,6 +360,7 @@ module Ai4r
|
|
168
360
|
# * propagation_function
|
169
361
|
# * derivative_propagation_function
|
170
362
|
# you must restore their values manually after loading the instance.
|
363
|
+
# @return [Object]
|
171
364
|
def marshal_dump
|
172
365
|
[
|
173
366
|
@structure,
|
@@ -176,151 +369,234 @@ module Ai4r
|
|
176
369
|
@momentum,
|
177
370
|
@weights,
|
178
371
|
@last_changes,
|
179
|
-
@activation_nodes
|
372
|
+
@activation_nodes,
|
373
|
+
@activation
|
180
374
|
]
|
181
|
-
|
182
|
-
|
183
|
-
def marshal_load(ary)
|
184
|
-
@structure,
|
185
|
-
@disable_bias,
|
186
|
-
@learning_rate,
|
187
|
-
@momentum,
|
188
|
-
@weights,
|
189
|
-
@last_changes,
|
190
|
-
@activation_nodes = ary
|
191
|
-
@initial_weight_function = lambda { |n, i, j| ((rand 2000)/1000.0) - 1}
|
192
|
-
@propagation_function = lambda { |x| 1/(1+Math.exp(-1*(x))) } #lambda { |x| Math.tanh(x) }
|
193
|
-
@derivative_propagation_function = lambda { |y| y*(1-y) } #lambda { |y| 1.0 - y**2 }
|
194
|
-
end
|
375
|
+
end
|
195
376
|
|
377
|
+
# @param ary [Object]
|
378
|
+
# @return [Object]
|
379
|
+
def marshal_load(ary)
|
380
|
+
@structure,
|
381
|
+
@disable_bias,
|
382
|
+
@learning_rate,
|
383
|
+
@momentum,
|
384
|
+
@weights,
|
385
|
+
@last_changes,
|
386
|
+
@activation_nodes,
|
387
|
+
@activation = ary
|
388
|
+
self.weight_init = :uniform
|
389
|
+
self.activation = @activation || :sigmoid
|
390
|
+
end
|
196
391
|
|
197
392
|
# Propagate error backwards
|
393
|
+
# @param expected_output_values [Object]
|
394
|
+
# @return [Object]
|
198
395
|
def backpropagate(expected_output_values)
|
199
396
|
check_output_dimension(expected_output_values.length)
|
200
397
|
calculate_output_deltas(expected_output_values)
|
201
398
|
calculate_internal_deltas
|
202
399
|
update_weights
|
203
400
|
end
|
204
|
-
|
401
|
+
|
205
402
|
# Propagate values forward
|
403
|
+
# @param input_values [Object]
|
404
|
+
# @return [Object]
|
206
405
|
def feedforward(input_values)
|
207
|
-
input_values.each_index do |input_index|
|
406
|
+
input_values.each_index do |input_index|
|
208
407
|
@activation_nodes.first[input_index] = input_values[input_index]
|
209
408
|
end
|
210
409
|
@weights.each_index do |n|
|
211
|
-
@structure[n+1].
|
212
|
-
|
410
|
+
sums = Array.new(@structure[n + 1], 0.0)
|
411
|
+
@structure[n + 1].times do |j|
|
213
412
|
@activation_nodes[n].each_index do |i|
|
214
|
-
|
413
|
+
sums[j] += (@activation_nodes[n][i] * @weights[n][i][j])
|
414
|
+
end
|
415
|
+
end
|
416
|
+
if @activation[n] == :softmax
|
417
|
+
values = @propagation_functions[n].call(sums)
|
418
|
+
values.each_index { |j| @activation_nodes[n + 1][j] = values[j] }
|
419
|
+
else
|
420
|
+
sums.each_index do |j|
|
421
|
+
@activation_nodes[n + 1][j] = @propagation_functions[n].call(sums[j])
|
215
422
|
end
|
216
|
-
@activation_nodes[n+1][j] = @propagation_function.call(sum)
|
217
423
|
end
|
218
|
-
end
|
424
|
+
end
|
219
425
|
end
|
220
|
-
|
426
|
+
|
221
427
|
# Initialize neurons structure.
|
428
|
+
# @return [Object]
|
222
429
|
def init_activation_nodes
|
223
|
-
@activation_nodes = Array.new(@structure.length) do |n|
|
430
|
+
@activation_nodes = Array.new(@structure.length) do |n|
|
224
431
|
Array.new(@structure[n], 1.0)
|
225
432
|
end
|
226
|
-
if
|
227
|
-
|
228
|
-
|
433
|
+
return if disable_bias
|
434
|
+
|
435
|
+
@activation_nodes[0...-1].each { |layer| layer << 1.0 }
|
229
436
|
end
|
230
|
-
|
437
|
+
|
231
438
|
# Initialize the weight arrays using function specified with the
|
232
439
|
# initial_weight_function parameter
|
440
|
+
# @return [Object]
|
233
441
|
def init_weights
|
234
|
-
@weights = Array.new(@structure.length-1) do |i|
|
442
|
+
@weights = Array.new(@structure.length - 1) do |i|
|
235
443
|
nodes_origin = @activation_nodes[i].length
|
236
|
-
nodes_target = @structure[i+1]
|
444
|
+
nodes_target = @structure[i + 1]
|
237
445
|
Array.new(nodes_origin) do |j|
|
238
|
-
Array.new(nodes_target) do |k|
|
446
|
+
Array.new(nodes_target) do |k|
|
239
447
|
@initial_weight_function.call(i, j, k)
|
240
448
|
end
|
241
449
|
end
|
242
450
|
end
|
243
|
-
end
|
451
|
+
end
|
244
452
|
|
245
|
-
# Momentum usage need to know how much a weight changed in the
|
246
|
-
# previous training. This method initialize the @last_changes
|
453
|
+
# Momentum usage need to know how much a weight changed in the
|
454
|
+
# previous training. This method initialize the @last_changes
|
247
455
|
# structure with 0 values.
|
456
|
+
# @return [Object]
|
248
457
|
def init_last_changes
|
249
458
|
@last_changes = Array.new(@weights.length) do |w|
|
250
|
-
Array.new(@weights[w].length) do |i|
|
459
|
+
Array.new(@weights[w].length) do |i|
|
251
460
|
Array.new(@weights[w][i].length, 0.0)
|
252
461
|
end
|
253
462
|
end
|
254
463
|
end
|
255
|
-
|
464
|
+
|
256
465
|
# Calculate deltas for output layer
|
466
|
+
# @param expected_values [Object]
|
467
|
+
# @return [Object]
|
257
468
|
def calculate_output_deltas(expected_values)
|
258
469
|
output_values = @activation_nodes.last
|
259
470
|
output_deltas = []
|
471
|
+
func = @derivative_functions.last
|
260
472
|
output_values.each_index do |output_index|
|
261
|
-
|
262
|
-
|
263
|
-
|
473
|
+
if @loss_function == :cross_entropy && @activation == :softmax
|
474
|
+
output_deltas << (output_values[output_index] - expected_values[output_index])
|
475
|
+
else
|
476
|
+
error = expected_values[output_index] - output_values[output_index]
|
477
|
+
output_deltas << (func.call(output_values[output_index]) * error)
|
478
|
+
end
|
264
479
|
end
|
265
480
|
@deltas = [output_deltas]
|
266
481
|
end
|
267
|
-
|
482
|
+
|
268
483
|
# Calculate deltas for hidden layers
|
484
|
+
# @return [Object]
|
269
485
|
def calculate_internal_deltas
|
270
486
|
prev_deltas = @deltas.last
|
271
|
-
(@activation_nodes.length-2).downto(1) do |layer_index|
|
487
|
+
(@activation_nodes.length - 2).downto(1) do |layer_index|
|
272
488
|
layer_deltas = []
|
273
489
|
@activation_nodes[layer_index].each_index do |j|
|
274
490
|
error = 0.0
|
275
|
-
@structure[layer_index+1].times do |k|
|
491
|
+
@structure[layer_index + 1].times do |k|
|
276
492
|
error += prev_deltas[k] * @weights[layer_index][j][k]
|
277
493
|
end
|
278
|
-
|
279
|
-
|
494
|
+
func = @derivative_functions[layer_index - 1]
|
495
|
+
layer_deltas[j] = func.call(@activation_nodes[layer_index][j]) * error
|
280
496
|
end
|
281
497
|
prev_deltas = layer_deltas
|
282
498
|
@deltas.unshift(layer_deltas)
|
283
499
|
end
|
284
500
|
end
|
285
|
-
|
501
|
+
|
286
502
|
# Update weights after @deltas have been calculated.
|
503
|
+
# @return [Object]
|
287
504
|
def update_weights
|
288
|
-
(@weights.length-1).downto(0) do |n|
|
289
|
-
@weights[n].each_index do |i|
|
290
|
-
@weights[n][i].each_index do |j|
|
291
|
-
change = @deltas[n][j]
|
292
|
-
@weights[n][i][j] += (
|
293
|
-
momentum * @last_changes[n][i][j])
|
505
|
+
(@weights.length - 1).downto(0) do |n|
|
506
|
+
@weights[n].each_index do |i|
|
507
|
+
@weights[n][i].each_index do |j|
|
508
|
+
change = @deltas[n][j] * @activation_nodes[n][i]
|
509
|
+
@weights[n][i][j] += ((learning_rate * change) +
|
510
|
+
(momentum * @last_changes[n][i][j]))
|
294
511
|
@last_changes[n][i][j] = change
|
295
512
|
end
|
296
513
|
end
|
297
514
|
end
|
298
515
|
end
|
299
|
-
|
300
|
-
# Calculate quadratic error for
|
516
|
+
|
517
|
+
# Calculate quadratic error for an expected output value
|
301
518
|
# Error = 0.5 * sum( (expected_value[i] - output_value[i])**2 )
|
519
|
+
# @param expected_output [Object]
|
520
|
+
# @return [Object]
|
302
521
|
def calculate_error(expected_output)
|
303
522
|
output_values = @activation_nodes.last
|
304
523
|
error = 0.0
|
305
524
|
expected_output.each_index do |output_index|
|
306
|
-
error +=
|
307
|
-
0.5*(output_values[output_index]-expected_output[output_index])**2
|
525
|
+
error +=
|
526
|
+
0.5 * ((output_values[output_index] - expected_output[output_index])**2)
|
308
527
|
end
|
309
|
-
|
528
|
+
error
|
310
529
|
end
|
311
|
-
|
530
|
+
|
531
|
+
# Calculate loss for expected/actual vectors according to selected
|
532
|
+
# loss_function (:mse or :cross_entropy).
|
533
|
+
# @param expected [Object]
|
534
|
+
# @param actual [Object]
|
535
|
+
# @return [Object]
|
536
|
+
def calculate_loss(expected, actual)
|
537
|
+
case @loss_function
|
538
|
+
when :cross_entropy
|
539
|
+
epsilon = 1e-12
|
540
|
+
loss = 0.0
|
541
|
+
if @activation == :softmax
|
542
|
+
expected.each_index do |i|
|
543
|
+
p = [[actual[i], epsilon].max, 1 - epsilon].min
|
544
|
+
loss -= expected[i] * Math.log(p)
|
545
|
+
end
|
546
|
+
else
|
547
|
+
expected.each_index do |i|
|
548
|
+
p = [[actual[i], epsilon].max, 1 - epsilon].min
|
549
|
+
loss -= (expected[i] * Math.log(p)) + ((1 - expected[i]) * Math.log(1 - p))
|
550
|
+
end
|
551
|
+
end
|
552
|
+
loss
|
553
|
+
else
|
554
|
+
# Mean squared error
|
555
|
+
error = 0.0
|
556
|
+
expected.each_index do |i|
|
557
|
+
error += 0.5 * ((expected[i] - actual[i])**2)
|
558
|
+
end
|
559
|
+
error
|
560
|
+
end
|
561
|
+
end
|
562
|
+
|
563
|
+
# @param inputs [Object]
|
564
|
+
# @return [Object]
|
312
565
|
def check_input_dimension(inputs)
|
313
|
-
|
314
|
-
|
315
|
-
|
566
|
+
return unless inputs != @structure.first
|
567
|
+
|
568
|
+
raise ArgumentError, 'Wrong number of inputs. ' \
|
569
|
+
"Expected: #{@structure.first}, " \
|
570
|
+
"received: #{inputs}."
|
316
571
|
end
|
317
572
|
|
573
|
+
# @param outputs [Object]
|
574
|
+
# @return [Object]
|
318
575
|
def check_output_dimension(outputs)
|
319
|
-
|
320
|
-
|
321
|
-
|
576
|
+
return unless outputs != @structure.last
|
577
|
+
|
578
|
+
raise ArgumentError, 'Wrong number of outputs. ' \
|
579
|
+
"Expected: #{@structure.last}, " \
|
580
|
+
"received: #{outputs}."
|
322
581
|
end
|
323
|
-
|
582
|
+
|
583
|
+
parameters_info disable_bias: 'If true, the algorithm will not use ' \
|
584
|
+
'bias nodes. False by default.',
|
585
|
+
initial_weight_function: 'f(n, i, j) must return the initial ' \
|
586
|
+
'weight for the conection between the node i in layer n, and ' \
|
587
|
+
'node j in layer n+1. By default a random number in [-1, 1) range.',
|
588
|
+
weight_init: 'Built-in weight initialization strategy (:uniform, :xavier or :he). Default: :uniform',
|
589
|
+
propagation_function: 'By default: ' \
|
590
|
+
'lambda { |x| 1/(1+Math.exp(-1*(x))) }',
|
591
|
+
derivative_propagation_function: 'Derivative of the propagation ' \
|
592
|
+
'function, based on propagation function output. By default: ' \
|
593
|
+
'lambda { |y| y*(1-y) }, where y=propagation_function(x)',
|
594
|
+
activation: 'Activation function per layer. Provide a symbol or an array of symbols (:sigmoid, :tanh, :relu or :softmax). Default: :sigmoid',
|
595
|
+
learning_rate: 'By default 0.25',
|
596
|
+
momentum: 'By default 0.1. Set this parameter to 0 to disable ' \
|
597
|
+
'momentum.',
|
598
|
+
loss_function: 'Loss function used when training (:mse or ' \
|
599
|
+
':cross_entropy). Default: :mse'
|
324
600
|
end
|
325
601
|
end
|
326
602
|
end
|