grydra 1.0.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENCE +34 -0
- data/README.md +1381 -80
- data/lib/grydra/activations.rb +70 -0
- data/lib/grydra/callbacks.rb +212 -0
- data/lib/grydra/documentation.rb +84 -0
- data/lib/grydra/initializers.rb +14 -0
- data/lib/grydra/layers/base.rb +10 -0
- data/lib/grydra/layers/conv.rb +106 -0
- data/lib/grydra/layers/dense.rb +17 -0
- data/lib/grydra/layers/lstm.rb +139 -0
- data/lib/grydra/losses.rb +119 -0
- data/lib/grydra/metrics.rb +75 -0
- data/lib/grydra/networks/easy_network.rb +161 -0
- data/lib/grydra/networks/main_network.rb +38 -0
- data/lib/grydra/networks/neural_network.rb +203 -0
- data/lib/grydra/networks/neuron.rb +80 -0
- data/lib/grydra/normalization.rb +77 -0
- data/lib/grydra/optimizers.rb +162 -0
- data/lib/grydra/preprocessing/data.rb +48 -0
- data/lib/grydra/preprocessing/pca.rb +132 -0
- data/lib/grydra/preprocessing/text.rb +62 -0
- data/lib/grydra/regularization.rb +19 -0
- data/lib/grydra/training/cross_validation.rb +35 -0
- data/lib/grydra/training/hyperparameter_search.rb +46 -0
- data/lib/grydra/utils/examples.rb +183 -0
- data/lib/grydra/utils/persistence.rb +94 -0
- data/lib/grydra/utils/visualization.rb +105 -0
- data/lib/grydra/version.rb +3 -0
- data/lib/grydra.rb +162 -2
- metadata +96 -17
- data/lib/gr/core.rb +0 -1926
- data/lib/gr/version.rb +0 -3
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
module GRYDRA
|
|
2
|
+
module Losses
|
|
3
|
+
# Mean Squared Error Loss
|
|
4
|
+
def self.mse(predictions, targets)
|
|
5
|
+
n = predictions.size
|
|
6
|
+
sum = predictions.zip(targets).map { |p, t| (p - t)**2 }.sum
|
|
7
|
+
sum / n.to_f
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
def self.mse_derivative(predictions, targets)
|
|
11
|
+
predictions.zip(targets).map { |p, t| 2 * (p - t) / predictions.size }
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
# Mean Absolute Error Loss
|
|
15
|
+
def self.mae(predictions, targets)
|
|
16
|
+
n = predictions.size
|
|
17
|
+
sum = predictions.zip(targets).map { |p, t| (p - t).abs }.sum
|
|
18
|
+
sum / n.to_f
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def self.mae_derivative(predictions, targets)
|
|
22
|
+
predictions.zip(targets).map { |p, t| p > t ? 1.0 / predictions.size : -1.0 / predictions.size }
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Binary Cross-Entropy Loss
|
|
26
|
+
def self.binary_crossentropy(predictions, targets, epsilon: 1e-7)
|
|
27
|
+
predictions = predictions.map { |p| [[p, epsilon].max, 1 - epsilon].min }
|
|
28
|
+
n = predictions.size
|
|
29
|
+
sum = predictions.zip(targets).map do |p, t|
|
|
30
|
+
-(t * Math.log(p) + (1 - t) * Math.log(1 - p))
|
|
31
|
+
end.sum
|
|
32
|
+
sum / n.to_f
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def self.binary_crossentropy_derivative(predictions, targets, epsilon: 1e-7)
|
|
36
|
+
predictions = predictions.map { |p| [[p, epsilon].max, 1 - epsilon].min }
|
|
37
|
+
predictions.zip(targets).map do |p, t|
|
|
38
|
+
(p - t) / (p * (1 - p) * predictions.size)
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Categorical Cross-Entropy Loss
|
|
43
|
+
def self.categorical_crossentropy(predictions, targets, epsilon: 1e-7)
|
|
44
|
+
predictions = predictions.map { |p| [p, epsilon].max }
|
|
45
|
+
n = predictions.size
|
|
46
|
+
sum = predictions.zip(targets).map do |pred_row, target_row|
|
|
47
|
+
pred_row.zip(target_row).map { |p, t| -t * Math.log(p) }.sum
|
|
48
|
+
end.sum
|
|
49
|
+
sum / n.to_f
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Huber Loss (robust to outliers)
|
|
53
|
+
def self.huber(predictions, targets, delta: 1.0)
|
|
54
|
+
n = predictions.size
|
|
55
|
+
sum = predictions.zip(targets).map do |p, t|
|
|
56
|
+
error = (p - t).abs
|
|
57
|
+
if error <= delta
|
|
58
|
+
0.5 * error**2
|
|
59
|
+
else
|
|
60
|
+
delta * (error - 0.5 * delta)
|
|
61
|
+
end
|
|
62
|
+
end.sum
|
|
63
|
+
sum / n.to_f
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def self.huber_derivative(predictions, targets, delta: 1.0)
|
|
67
|
+
predictions.zip(targets).map do |p, t|
|
|
68
|
+
error = p - t
|
|
69
|
+
if error.abs <= delta
|
|
70
|
+
error / predictions.size
|
|
71
|
+
else
|
|
72
|
+
delta * (error > 0 ? 1 : -1) / predictions.size
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Hinge Loss (for SVM-style classification)
|
|
78
|
+
def self.hinge(predictions, targets)
|
|
79
|
+
n = predictions.size
|
|
80
|
+
sum = predictions.zip(targets).map do |p, t|
|
|
81
|
+
[0, 1 - t * p].max
|
|
82
|
+
end.sum
|
|
83
|
+
sum / n.to_f
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def self.hinge_derivative(predictions, targets)
|
|
87
|
+
predictions.zip(targets).map do |p, t|
|
|
88
|
+
(1 - t * p) > 0 ? -t / predictions.size : 0
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# Log-Cosh Loss (smooth approximation of MAE)
|
|
93
|
+
def self.log_cosh(predictions, targets)
|
|
94
|
+
n = predictions.size
|
|
95
|
+
sum = predictions.zip(targets).map do |p, t|
|
|
96
|
+
x = p - t
|
|
97
|
+
Math.log(Math.cosh(x))
|
|
98
|
+
end.sum
|
|
99
|
+
sum / n.to_f
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def self.log_cosh_derivative(predictions, targets)
|
|
103
|
+
predictions.zip(targets).map do |p, t|
|
|
104
|
+
x = p - t
|
|
105
|
+
Math.tanh(x) / predictions.size
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# Quantile Loss (for quantile regression)
|
|
110
|
+
def self.quantile(predictions, targets, quantile: 0.5)
|
|
111
|
+
n = predictions.size
|
|
112
|
+
sum = predictions.zip(targets).map do |p, t|
|
|
113
|
+
error = t - p
|
|
114
|
+
error > 0 ? quantile * error : (quantile - 1) * error
|
|
115
|
+
end.sum
|
|
116
|
+
sum / n.to_f
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
end
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
module GRYDRA
|
|
2
|
+
module Metrics
|
|
3
|
+
# Mean Squared Error
|
|
4
|
+
def self.mse(predictions, actuals)
|
|
5
|
+
n = predictions.size
|
|
6
|
+
sum = predictions.zip(actuals).map { |p, r| (p - r)**2 }.sum
|
|
7
|
+
sum / n.to_f
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
# Mean Absolute Error
|
|
11
|
+
def self.mae(predictions, actuals)
|
|
12
|
+
n = predictions.size
|
|
13
|
+
sum = predictions.zip(actuals).map { |p, r| (p - r).abs }.sum
|
|
14
|
+
sum / n.to_f
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# Precision
|
|
18
|
+
def self.precision(tp, fp)
|
|
19
|
+
return 0.0 if (tp + fp).zero?
|
|
20
|
+
tp.to_f / (tp + fp)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Recall
|
|
24
|
+
def self.recall(tp, fn)
|
|
25
|
+
return 0.0 if (tp + fn).zero?
|
|
26
|
+
tp.to_f / (tp + fn)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# F1 Score
|
|
30
|
+
def self.f1(precision, recall)
|
|
31
|
+
return 0.0 if (precision + recall).zero?
|
|
32
|
+
2 * (precision * recall) / (precision + recall)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Confusion Matrix
|
|
36
|
+
def self.confusion_matrix(predictions, actuals, threshold = 0.5)
|
|
37
|
+
tp = fp = tn = fn = 0
|
|
38
|
+
predictions.zip(actuals).each do |pred, actual|
|
|
39
|
+
pred_bin = pred > threshold ? 1 : 0
|
|
40
|
+
case [pred_bin, actual]
|
|
41
|
+
when [1, 1] then tp += 1
|
|
42
|
+
when [1, 0] then fp += 1
|
|
43
|
+
when [0, 0] then tn += 1
|
|
44
|
+
when [0, 1] then fn += 1
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
{ tp: tp, fp: fp, tn: tn, fn: fn }
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Area Under the ROC Curve
|
|
51
|
+
def self.auc_roc(predictions, actuals)
|
|
52
|
+
pairs = predictions.zip(actuals).sort_by { |pred, _| -pred }
|
|
53
|
+
positives = actuals.count(1)
|
|
54
|
+
negatives = actuals.count(0)
|
|
55
|
+
return 0.5 if positives == 0 || negatives == 0
|
|
56
|
+
|
|
57
|
+
auc = 0.0
|
|
58
|
+
fp = 0
|
|
59
|
+
pairs.each do |_, actual|
|
|
60
|
+
if actual == 1
|
|
61
|
+
auc += fp
|
|
62
|
+
else
|
|
63
|
+
fp += 1
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
auc / (positives * negatives).to_f
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Accuracy
|
|
70
|
+
def self.accuracy(predictions, actuals, threshold = 0.5)
|
|
71
|
+
correct = predictions.zip(actuals).count { |pred, actual| (pred > threshold ? 1 : 0) == actual }
|
|
72
|
+
correct.to_f / predictions.size
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
module GRYDRA
|
|
2
|
+
module Networks
|
|
3
|
+
# Easy Network - Simplified interface for quick training
|
|
4
|
+
class EasyNetwork
|
|
5
|
+
attr_accessor :network, :vocabulary, :max_values, :max_values_output
|
|
6
|
+
|
|
7
|
+
def initialize(print_epochs: false, plot: false, verbose: true)
|
|
8
|
+
@network = MainNetwork.new(print_epochs: print_epochs, plot: plot)
|
|
9
|
+
@vocabulary = nil
|
|
10
|
+
@max_values = {}
|
|
11
|
+
@max_values_output = {}
|
|
12
|
+
@verbose = verbose
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def configure_adam_optimizer(alpha = 0.001, beta1 = 0.9, beta2 = 0.999)
|
|
16
|
+
@network.subnets.each { |subnet| subnet.use_adam_optimizer(alpha, beta1, beta2) }
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def evaluate_model(data_test_x, data_test_y, metrics = %i[mse mae], normalization = :max)
|
|
20
|
+
predictions = predict_numerical(data_test_x, normalization)
|
|
21
|
+
results = {}
|
|
22
|
+
|
|
23
|
+
# Handle both single and multi-output cases
|
|
24
|
+
predictions_flat = predictions.flatten
|
|
25
|
+
actuals_flat = data_test_y.flatten
|
|
26
|
+
|
|
27
|
+
# Ensure we have valid data
|
|
28
|
+
return results if predictions_flat.empty? || actuals_flat.empty?
|
|
29
|
+
return results if predictions_flat.any?(&:nil?) || actuals_flat.any?(&:nil?)
|
|
30
|
+
|
|
31
|
+
metrics.each do |metric|
|
|
32
|
+
case metric
|
|
33
|
+
when :mse
|
|
34
|
+
results[:mse] = Metrics.mse(predictions_flat, actuals_flat)
|
|
35
|
+
when :mae
|
|
36
|
+
results[:mae] = Metrics.mae(predictions_flat, actuals_flat)
|
|
37
|
+
when :accuracy
|
|
38
|
+
results[:accuracy] = Metrics.accuracy(predictions_flat, actuals_flat)
|
|
39
|
+
when :confusion_matrix
|
|
40
|
+
results[:confusion_matrix] = Metrics.confusion_matrix(predictions_flat, actuals_flat)
|
|
41
|
+
when :auc_roc
|
|
42
|
+
results[:auc_roc] = Metrics.auc_roc(predictions_flat, actuals_flat)
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
results
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Train with hash data
|
|
49
|
+
def train_hashes(data_hash, input_keys, label_key, structures, rate, epochs, normalization = :max, **opts)
|
|
50
|
+
@network.subnets.clear
|
|
51
|
+
|
|
52
|
+
inputs = data_hash.map do |item|
|
|
53
|
+
input_keys.map do |key|
|
|
54
|
+
value = item[key]
|
|
55
|
+
value == true ? 1.0 : (value == false ? 0.0 : value.to_f)
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
@max_values = Normalization.calculate_max_values(inputs, normalization)
|
|
60
|
+
data_normalized = Normalization.normalize_multiple(inputs, @max_values, normalization)
|
|
61
|
+
|
|
62
|
+
labels = data_hash.map { |item| [item[label_key].to_f] }
|
|
63
|
+
@max_values_output = Normalization.calculate_max_values(labels, normalization)
|
|
64
|
+
labels_no = Normalization.normalize_multiple(labels, @max_values_output, normalization)
|
|
65
|
+
|
|
66
|
+
structures.each do |structure|
|
|
67
|
+
@network.add_subnet([input_keys.size, *structure])
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
data_for_subnets = structures.map { |_| { input: data_normalized, output: labels_no } }
|
|
71
|
+
@network.train_subnets(data_for_subnets, rate, epochs, **opts)
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def predict_hashes(new_hashes, input_keys, normalization = :max)
|
|
75
|
+
inputs = new_hashes.map do |item|
|
|
76
|
+
input_keys.map do |key|
|
|
77
|
+
value = item[key]
|
|
78
|
+
value == true ? 1.0 : (value == false ? 0.0 : value.to_f)
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
data_normalized = Normalization.normalize_multiple(inputs, @max_values, normalization)
|
|
83
|
+
denormalize_predictions(data_normalized, normalization)
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Train with numerical data
|
|
87
|
+
def train_numerical(data_input, data_output, structures, rate, epochs, normalization = :max, **opts)
|
|
88
|
+
@network.subnets.clear
|
|
89
|
+
|
|
90
|
+
@max_values = Normalization.calculate_max_values(data_input, normalization)
|
|
91
|
+
@max_values_output = Normalization.calculate_max_values(data_output, normalization)
|
|
92
|
+
|
|
93
|
+
data_input_no = Normalization.normalize_multiple(data_input, @max_values, normalization)
|
|
94
|
+
data_output_no = Normalization.normalize_multiple(data_output, @max_values_output, normalization)
|
|
95
|
+
|
|
96
|
+
structures.each do |structure|
|
|
97
|
+
@network.add_subnet([data_input.first.size, *structure])
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
data_for_subnets = structures.map { |_| { input: data_input_no, output: data_output_no } }
|
|
101
|
+
@network.train_subnets(data_for_subnets, rate, epochs, **opts)
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def predict_numerical(new_data, normalization = :max)
|
|
105
|
+
data_normalized = Normalization.normalize_multiple(new_data, @max_values, normalization)
|
|
106
|
+
denormalize_predictions(data_normalized, normalization)
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# Train with text data
|
|
110
|
+
def train_text(texts, labels, structures, rate, epochs, normalization = :max, **opts)
|
|
111
|
+
@network.subnets.clear
|
|
112
|
+
@vocabulary = Preprocessing::Text.create_vocabulary(texts)
|
|
113
|
+
|
|
114
|
+
inputs = texts.map { |text| Preprocessing::Text.vectorize_text(text, @vocabulary) }
|
|
115
|
+
|
|
116
|
+
# For text, use simple max normalization
|
|
117
|
+
max_val = @vocabulary.size.to_f
|
|
118
|
+
data_normalized = inputs.map { |input| input.map { |v| v / max_val } }
|
|
119
|
+
@max_values = { 0 => max_val }
|
|
120
|
+
|
|
121
|
+
@max_values_output = Normalization.calculate_max_values(labels, normalization)
|
|
122
|
+
labels_no = Normalization.normalize_multiple(labels, @max_values_output, normalization)
|
|
123
|
+
|
|
124
|
+
structures.each do |structure|
|
|
125
|
+
@network.add_subnet([@vocabulary.size, *structure])
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
data_for_subnets = structures.map { |_| { input: data_normalized, output: labels_no } }
|
|
129
|
+
@network.train_subnets(data_for_subnets, rate, epochs, **opts)
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
def predict_text(new_texts, normalization = :max)
|
|
133
|
+
inputs = new_texts.map { |text| Preprocessing::Text.vectorize_text(text, @vocabulary) }
|
|
134
|
+
|
|
135
|
+
# For text, we use a simple max normalization based on vocabulary size
|
|
136
|
+
max_val = @vocabulary.size.to_f
|
|
137
|
+
data_normalized = inputs.map { |input| input.map { |v| v / max_val } }
|
|
138
|
+
|
|
139
|
+
denormalize_predictions(data_normalized, normalization)
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
private
|
|
143
|
+
|
|
144
|
+
def denormalize_predictions(data_normalized, normalization)
|
|
145
|
+
data_normalized.map do |input|
|
|
146
|
+
pred_norm = @network.combine_results(input)
|
|
147
|
+
if normalization == :zscore && @max_values_output.is_a?(Hash) && @max_values_output.key?(:std_devs)
|
|
148
|
+
pred_norm.map.with_index do |val, idx|
|
|
149
|
+
val * @max_values_output[:std_devs][idx] + @max_values_output[:means][idx]
|
|
150
|
+
end
|
|
151
|
+
else
|
|
152
|
+
pred_norm.map.with_index do |val, idx|
|
|
153
|
+
max_val = @max_values_output[idx] || @max_values_output.values.first || 1.0
|
|
154
|
+
val * max_val
|
|
155
|
+
end
|
|
156
|
+
end
|
|
157
|
+
end
|
|
158
|
+
end
|
|
159
|
+
end
|
|
160
|
+
end
|
|
161
|
+
end
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
module GRYDRA
|
|
2
|
+
module Networks
|
|
3
|
+
# Main Network with multiple subnets
|
|
4
|
+
class MainNetwork
|
|
5
|
+
attr_accessor :subnets
|
|
6
|
+
|
|
7
|
+
def initialize(print_epochs: false, plot: false)
|
|
8
|
+
@subnets = []
|
|
9
|
+
@print_epochs = print_epochs
|
|
10
|
+
@plot = plot
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def add_subnet(structure, activations = nil)
|
|
14
|
+
@subnets << NeuralNetwork.new(structure, print_epochs: @print_epochs, plot: @plot, activations: activations)
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def train_subnets(data, learning_rate, epochs, **opts)
|
|
18
|
+
data.each_with_index do |data_subnet, index|
|
|
19
|
+
puts "Training Subnet #{index + 1}..."
|
|
20
|
+
@subnets[index].train(data_subnet[:input], data_subnet[:output], learning_rate, epochs, **opts)
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def combine_results(input_main)
|
|
25
|
+
outputs_subnets = @subnets.map { |subnet| subnet.calculate_outputs(input_main) }
|
|
26
|
+
outputs_subnets.transpose.map { |outputs| outputs.sum / outputs.size }
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def combine_results_weighted(input_main, weights = nil)
|
|
30
|
+
outputs_subnets = @subnets.map { |subnet| subnet.calculate_outputs(input_main) }
|
|
31
|
+
weights ||= Array.new(@subnets.size, 1.0 / @subnets.size)
|
|
32
|
+
outputs_subnets.transpose.map do |outputs|
|
|
33
|
+
outputs.zip(weights).map { |output, weight| output * weight }.sum
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
module GRYDRA
|
|
2
|
+
module Networks
|
|
3
|
+
# Neural Network class
|
|
4
|
+
class NeuralNetwork
|
|
5
|
+
attr_accessor :layers, :history_error
|
|
6
|
+
|
|
7
|
+
def initialize(structure, print_epochs: false, plot: false, activations: nil)
|
|
8
|
+
@print_epochs = print_epochs
|
|
9
|
+
@plot = plot
|
|
10
|
+
@layers = []
|
|
11
|
+
@history_error = []
|
|
12
|
+
@optimizer = nil
|
|
13
|
+
|
|
14
|
+
activations ||= Array.new(structure.size - 1, :tanh)
|
|
15
|
+
|
|
16
|
+
structure.each_cons(2).with_index do |(inputs, outputs), i|
|
|
17
|
+
@layers << Layers::Dense.new(outputs, inputs, activations[i])
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def use_adam_optimizer(alpha = 0.001, beta1 = 0.9, beta2 = 0.999)
|
|
22
|
+
@optimizer = Optimizers::Adam.new(alpha: alpha, beta1: beta1, beta2: beta2)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def calculate_outputs(inputs, apply_dropout = false, dropout_rate = 0.5)
|
|
26
|
+
unless inputs.is_a?(Array) && inputs.all? { |e| e.is_a?(Numeric) }
|
|
27
|
+
raise ArgumentError, 'Inputs must be an array of numbers'
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
@layers.inject(inputs) { |outputs, layer| layer.calculate_outputs(outputs, apply_dropout, dropout_rate) }
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Training with mini-batch, early stopping, decay learning rate, regularization and validation
|
|
34
|
+
def train(data_input, data_output, learning_rate, epochs,
|
|
35
|
+
error_threshold: nil, batch_size: 1, patience: 50, decay: 0.95,
|
|
36
|
+
lambda_l1: 0, lambda_l2: 0, dropout: false, dropout_rate: 0.5,
|
|
37
|
+
validation_split: 0.0)
|
|
38
|
+
best_error = Float::INFINITY
|
|
39
|
+
patience_counter = 0
|
|
40
|
+
|
|
41
|
+
# Split validation data if requested
|
|
42
|
+
if validation_split > 0
|
|
43
|
+
split_idx = (data_input.size * (1 - validation_split)).to_i
|
|
44
|
+
indices = (0...data_input.size).to_a.shuffle
|
|
45
|
+
train_indices = indices[0...split_idx]
|
|
46
|
+
val_indices = indices[split_idx..-1]
|
|
47
|
+
|
|
48
|
+
train_input = train_indices.map { |i| data_input[i] }
|
|
49
|
+
train_output = train_indices.map { |i| data_output[i] }
|
|
50
|
+
val_input = val_indices.map { |i| data_input[i] }
|
|
51
|
+
val_output = val_indices.map { |i| data_output[i] }
|
|
52
|
+
else
|
|
53
|
+
train_input = data_input
|
|
54
|
+
train_output = data_output
|
|
55
|
+
val_input = nil
|
|
56
|
+
val_output = nil
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
epochs.times do |epoch|
|
|
60
|
+
error_total = 0
|
|
61
|
+
error_regularization = 0
|
|
62
|
+
|
|
63
|
+
# Shuffle training data
|
|
64
|
+
indices = (0...train_input.size).to_a.shuffle
|
|
65
|
+
train_input = indices.map { |i| train_input[i] }
|
|
66
|
+
train_output = indices.map { |i| train_output[i] }
|
|
67
|
+
|
|
68
|
+
data_input.each_slice(batch_size).with_index do |batch_inputs, batch_idx|
|
|
69
|
+
batch_outputs_real = data_output[batch_idx * batch_size, batch_size]
|
|
70
|
+
|
|
71
|
+
batch_inputs.zip(batch_outputs_real).each do |input, output_real|
|
|
72
|
+
outputs = calculate_outputs(input, dropout, dropout_rate)
|
|
73
|
+
errors = outputs.zip(output_real).map { |output, real| real - output }
|
|
74
|
+
error_total += errors.map { |e| e**2 }.sum / errors.size
|
|
75
|
+
|
|
76
|
+
# Calculate regularization penalty
|
|
77
|
+
if lambda_l1 > 0 || lambda_l2 > 0
|
|
78
|
+
@layers.each do |layer|
|
|
79
|
+
layer.neurons.each do |neuron|
|
|
80
|
+
error_regularization += Regularization.l1_regularization(neuron.weights, lambda_l1) if lambda_l1 > 0
|
|
81
|
+
error_regularization += Regularization.l2_regularization(neuron.weights, lambda_l2) if lambda_l2 > 0
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Backpropagation
|
|
87
|
+
backpropagate(input, errors, learning_rate, lambda_l1, lambda_l2)
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
error_total += error_regularization
|
|
92
|
+
|
|
93
|
+
# Early stopping check
|
|
94
|
+
if error_threshold && error_total < error_threshold
|
|
95
|
+
puts "Error threshold reached at epoch #{epoch + 1}: #{error_total}"
|
|
96
|
+
break
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
if error_total < best_error
|
|
100
|
+
best_error = error_total
|
|
101
|
+
patience_counter = 0
|
|
102
|
+
else
|
|
103
|
+
patience_counter += 1
|
|
104
|
+
if patience_counter >= patience
|
|
105
|
+
puts "Early stopping at epoch #{epoch + 1}, error has not improved for #{patience} epochs."
|
|
106
|
+
break
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
learning_rate *= decay
|
|
111
|
+
@history_error << error_total if @plot
|
|
112
|
+
|
|
113
|
+
if @print_epochs
|
|
114
|
+
puts "Epoch #{epoch + 1}, Total Error: #{error_total.round(6)}, learning rate: #{learning_rate.round(6)}"
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
Utils::Visualization.plot_error(@history_error) if @plot
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def info_network
|
|
122
|
+
puts "Neural network with #{@layers.size} layers:"
|
|
123
|
+
@layers.each_with_index do |layer, i|
|
|
124
|
+
puts " Layer #{i + 1}: #{layer.neurons.size} neurons, activation: #{layer.activation}"
|
|
125
|
+
layer.neurons.each_with_index do |neuron, j|
|
|
126
|
+
puts " Neuron #{j + 1}: Weights=#{neuron.weights.map { |p| p.round(3) }}, Bias=#{neuron.bias.round(3)}"
|
|
127
|
+
end
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
# Export to DOT for Graphviz
|
|
132
|
+
def export_graphviz(filename = 'neural_network.dot')
|
|
133
|
+
File.open(filename, 'w') do |f|
|
|
134
|
+
f.puts 'digraph NeuralNetwork {'
|
|
135
|
+
@layers.each_with_index do |layer, i|
|
|
136
|
+
layer.neurons.each_with_index do |_neuron, j|
|
|
137
|
+
node = "L#{i}_N#{j}"
|
|
138
|
+
f.puts " #{node} [label=\"N#{j + 1}\"];"
|
|
139
|
+
next unless i < @layers.size - 1
|
|
140
|
+
|
|
141
|
+
@layers[i + 1].neurons.each_with_index do |next_neuron, k|
|
|
142
|
+
weight = next_neuron.weights[j].round(3)
|
|
143
|
+
f.puts " #{node} -> L#{i + 1}_N#{k} [label=\"#{weight}\"];"
|
|
144
|
+
end
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
f.puts '}'
|
|
148
|
+
end
|
|
149
|
+
puts "Network exported to #{filename} (Graphviz DOT)"
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
private
|
|
153
|
+
|
|
154
|
+
def backpropagate(input, errors, learning_rate, lambda_l1, lambda_l2)
|
|
155
|
+
# Output layer
|
|
156
|
+
@layers.last.neurons.each_with_index do |neuron, i|
|
|
157
|
+
neuron.delta = errors[i] * neuron.derivative_activation
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
# Hidden layers
|
|
161
|
+
(@layers.size - 2).downto(0) do |i|
|
|
162
|
+
@layers[i].neurons.each_with_index do |neuron, j|
|
|
163
|
+
sum_deltas = @layers[i + 1].neurons.sum { |n| n.weights[j] * n.delta }
|
|
164
|
+
neuron.delta = sum_deltas * neuron.derivative_activation
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
# Update weights and biases
|
|
169
|
+
@layers.each_with_index do |layer, idx|
|
|
170
|
+
inputs_layer = idx.zero? ? input : @layers[idx - 1].neurons.map(&:output)
|
|
171
|
+
layer.neurons.each_with_index do |neuron, neuron_idx|
|
|
172
|
+
neuron.weights.each_with_index do |_weight, i|
|
|
173
|
+
gradient = neuron.delta * inputs_layer[i]
|
|
174
|
+
|
|
175
|
+
if @optimizer
|
|
176
|
+
param_id = "layer_#{idx}_neuron_#{neuron_idx}_weight_#{i}"
|
|
177
|
+
update = @optimizer.update(param_id, gradient)
|
|
178
|
+
neuron.weights[i] += update
|
|
179
|
+
else
|
|
180
|
+
neuron.weights[i] += learning_rate * gradient
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
# Apply regularization
|
|
184
|
+
if lambda_l1 > 0
|
|
185
|
+
neuron.weights[i] -= learning_rate * lambda_l1 * (neuron.weights[i] > 0 ? 1 : -1)
|
|
186
|
+
end
|
|
187
|
+
neuron.weights[i] -= learning_rate * lambda_l2 * 2 * neuron.weights[i] if lambda_l2 > 0
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
# Update bias
|
|
191
|
+
if @optimizer
|
|
192
|
+
param_id = "layer_#{idx}_neuron_#{neuron_idx}_bias"
|
|
193
|
+
update = @optimizer.update(param_id, neuron.delta)
|
|
194
|
+
neuron.bias += update
|
|
195
|
+
else
|
|
196
|
+
neuron.bias += learning_rate * neuron.delta
|
|
197
|
+
end
|
|
198
|
+
end
|
|
199
|
+
end
|
|
200
|
+
end
|
|
201
|
+
end
|
|
202
|
+
end
|
|
203
|
+
end
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
module GRYDRA
|
|
2
|
+
module Networks
|
|
3
|
+
# Individual Neuron class
|
|
4
|
+
class Neuron
|
|
5
|
+
attr_accessor :weights, :bias, :output, :delta
|
|
6
|
+
|
|
7
|
+
def initialize(inputs, activation = :tanh)
|
|
8
|
+
unless inputs.is_a?(Integer) && inputs > 0
|
|
9
|
+
raise ArgumentError, 'Number of inputs must be a positive integer'
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
@weights = case activation
|
|
13
|
+
when :relu, :leaky_relu
|
|
14
|
+
Array.new(inputs) { Initializers.he_init(inputs) }
|
|
15
|
+
else
|
|
16
|
+
Array.new(inputs) { Initializers.xavier_init(inputs) }
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
@bias = case activation
|
|
20
|
+
when :relu, :leaky_relu
|
|
21
|
+
Initializers.he_init(inputs)
|
|
22
|
+
else
|
|
23
|
+
Initializers.xavier_init(inputs)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
@output = 0
|
|
27
|
+
@delta = 0
|
|
28
|
+
@activation = activation
|
|
29
|
+
@sum = 0
|
|
30
|
+
@dropout_mask = nil
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def calculate_output(inputs, apply_dropout = false, dropout_rate = 0.5)
|
|
34
|
+
unless inputs.is_a?(Array) && inputs.all? { |e| e.is_a?(Numeric) }
|
|
35
|
+
raise ArgumentError, 'Inputs must be an array of numbers'
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
if @weights.size != inputs.size
|
|
39
|
+
raise ArgumentError, "Error: inputs (#{inputs.size}) do not match weights (#{@weights.size})"
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
@sum = @weights.zip(inputs).map { |weight, input| weight * input }.sum + @bias
|
|
43
|
+
@output = apply_activation(@sum)
|
|
44
|
+
|
|
45
|
+
if apply_dropout
|
|
46
|
+
@dropout_mask = rand < dropout_rate ? 0 : 1 / (1 - dropout_rate)
|
|
47
|
+
@output *= @dropout_mask
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
@output
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def derivative_activation
|
|
54
|
+
case @activation
|
|
55
|
+
when :tanh then Activations.derivative_tanh(@output)
|
|
56
|
+
when :relu then Activations.derivative_relu(@output)
|
|
57
|
+
when :sigmoid then Activations.derivative_sigmoid(@sum)
|
|
58
|
+
when :leaky_relu then Activations.derivative_leaky_relu(@sum)
|
|
59
|
+
when :swish then Activations.derivative_swish(@sum)
|
|
60
|
+
when :gelu then Activations.derivative_gelu(@sum)
|
|
61
|
+
else 1
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
private
|
|
66
|
+
|
|
67
|
+
def apply_activation(value)
|
|
68
|
+
case @activation
|
|
69
|
+
when :tanh then Activations.tanh(value)
|
|
70
|
+
when :relu then Activations.relu(value)
|
|
71
|
+
when :sigmoid then Activations.sigmoid(value)
|
|
72
|
+
when :leaky_relu then Activations.leaky_relu(value)
|
|
73
|
+
when :swish then Activations.swish(value)
|
|
74
|
+
when :gelu then Activations.gelu(value)
|
|
75
|
+
else value
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
end
|