grydra 1.0.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENCE +34 -0
- data/README.md +1381 -80
- data/lib/grydra/activations.rb +70 -0
- data/lib/grydra/callbacks.rb +212 -0
- data/lib/grydra/documentation.rb +84 -0
- data/lib/grydra/initializers.rb +14 -0
- data/lib/grydra/layers/base.rb +10 -0
- data/lib/grydra/layers/conv.rb +106 -0
- data/lib/grydra/layers/dense.rb +17 -0
- data/lib/grydra/layers/lstm.rb +139 -0
- data/lib/grydra/losses.rb +119 -0
- data/lib/grydra/metrics.rb +75 -0
- data/lib/grydra/networks/easy_network.rb +161 -0
- data/lib/grydra/networks/main_network.rb +38 -0
- data/lib/grydra/networks/neural_network.rb +203 -0
- data/lib/grydra/networks/neuron.rb +80 -0
- data/lib/grydra/normalization.rb +77 -0
- data/lib/grydra/optimizers.rb +162 -0
- data/lib/grydra/preprocessing/data.rb +48 -0
- data/lib/grydra/preprocessing/pca.rb +132 -0
- data/lib/grydra/preprocessing/text.rb +62 -0
- data/lib/grydra/regularization.rb +19 -0
- data/lib/grydra/training/cross_validation.rb +35 -0
- data/lib/grydra/training/hyperparameter_search.rb +46 -0
- data/lib/grydra/utils/examples.rb +183 -0
- data/lib/grydra/utils/persistence.rb +94 -0
- data/lib/grydra/utils/visualization.rb +105 -0
- data/lib/grydra/version.rb +3 -0
- data/lib/grydra.rb +162 -2
- metadata +96 -17
- data/lib/gr/core.rb +0 -1926
- data/lib/gr/version.rb +0 -3
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
module GRYDRA
|
|
2
|
+
module Activations
|
|
3
|
+
# Hyperbolic tangent activation function
|
|
4
|
+
def self.tanh(x)
|
|
5
|
+
Math.tanh(x)
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
def self.derivative_tanh(x)
|
|
9
|
+
1 - tanh(x)**2
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
# Rectified Linear Unit
|
|
13
|
+
def self.relu(x)
|
|
14
|
+
x > 0 ? x : 0
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def self.derivative_relu(x)
|
|
18
|
+
x > 0 ? 1 : 0
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# Sigmoid activation function
|
|
22
|
+
def self.sigmoid(x)
|
|
23
|
+
1.0 / (1.0 + Math.exp(-x))
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def self.derivative_sigmoid(x)
|
|
27
|
+
s = sigmoid(x)
|
|
28
|
+
s * (1 - s)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Softmax for multi-class classification
|
|
32
|
+
def self.softmax(vector)
|
|
33
|
+
max = vector.max
|
|
34
|
+
exps = vector.map { |v| Math.exp(v - max) }
|
|
35
|
+
sum = exps.sum
|
|
36
|
+
exps.map { |v| v / sum }
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# Leaky ReLU
|
|
40
|
+
def self.leaky_relu(x, alpha = 0.01)
|
|
41
|
+
x > 0 ? x : alpha * x
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def self.derivative_leaky_relu(x, alpha = 0.01)
|
|
45
|
+
x > 0 ? 1 : alpha
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Swish activation function
|
|
49
|
+
def self.swish(x)
|
|
50
|
+
x * sigmoid(x)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def self.derivative_swish(x)
|
|
54
|
+
s = sigmoid(x)
|
|
55
|
+
s + x * s * (1 - s)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# GELU (Gaussian Error Linear Unit)
|
|
59
|
+
def self.gelu(x)
|
|
60
|
+
0.5 * x * (1 + Math.tanh(Math.sqrt(2 / Math::PI) * (x + 0.044715 * x**3)))
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def self.derivative_gelu(x)
|
|
64
|
+
tanh_arg = Math.sqrt(2 / Math::PI) * (x + 0.044715 * x**3)
|
|
65
|
+
tanh_val = Math.tanh(tanh_arg)
|
|
66
|
+
sech2 = 1 - tanh_val**2
|
|
67
|
+
0.5 * (1 + tanh_val) + 0.5 * x * sech2 * Math.sqrt(2 / Math::PI) * (1 + 3 * 0.044715 * x**2)
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
module GRYDRA
|
|
2
|
+
module Callbacks
|
|
3
|
+
# Base Callback class
|
|
4
|
+
class Base
|
|
5
|
+
def on_train_begin(logs = {}); end
|
|
6
|
+
def on_train_end(logs = {}); end
|
|
7
|
+
def on_epoch_begin(epoch, logs = {}); end
|
|
8
|
+
def on_epoch_end(epoch, logs = {}); end
|
|
9
|
+
def on_batch_begin(batch, logs = {}); end
|
|
10
|
+
def on_batch_end(batch, logs = {}); end
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
# Early Stopping Callback
|
|
14
|
+
class EarlyStopping < Base
|
|
15
|
+
attr_reader :stopped_epoch
|
|
16
|
+
|
|
17
|
+
def initialize(monitor: :loss, patience: 10, min_delta: 0.0, mode: :min, restore_best: true)
|
|
18
|
+
@monitor = monitor
|
|
19
|
+
@patience = patience
|
|
20
|
+
@min_delta = min_delta
|
|
21
|
+
@mode = mode
|
|
22
|
+
@restore_best = restore_best
|
|
23
|
+
@wait = 0
|
|
24
|
+
@stopped_epoch = 0
|
|
25
|
+
@best_value = mode == :min ? Float::INFINITY : -Float::INFINITY
|
|
26
|
+
@best_weights = nil
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def on_epoch_end(epoch, logs = {})
|
|
30
|
+
current = logs[@monitor]
|
|
31
|
+
return unless current
|
|
32
|
+
|
|
33
|
+
if improved?(current)
|
|
34
|
+
@best_value = current
|
|
35
|
+
@wait = 0
|
|
36
|
+
@best_weights = logs[:weights].dup if @restore_best && logs[:weights]
|
|
37
|
+
else
|
|
38
|
+
@wait += 1
|
|
39
|
+
if @wait >= @patience
|
|
40
|
+
@stopped_epoch = epoch
|
|
41
|
+
logs[:stop_training] = true
|
|
42
|
+
logs[:weights] = @best_weights if @restore_best && @best_weights
|
|
43
|
+
puts "Early stopping at epoch #{epoch + 1}"
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
private
|
|
49
|
+
|
|
50
|
+
def improved?(current)
|
|
51
|
+
if @mode == :min
|
|
52
|
+
current < @best_value - @min_delta
|
|
53
|
+
else
|
|
54
|
+
current > @best_value + @min_delta
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Learning Rate Scheduler
|
|
60
|
+
class LearningRateScheduler < Base
|
|
61
|
+
def initialize(schedule: nil, &block)
|
|
62
|
+
@schedule = schedule || block
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def on_epoch_begin(epoch, logs = {})
|
|
66
|
+
new_lr = @schedule.call(epoch, logs[:learning_rate])
|
|
67
|
+
logs[:learning_rate] = new_lr if new_lr
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# Reduce Learning Rate on Plateau
|
|
72
|
+
class ReduceLROnPlateau < Base
|
|
73
|
+
def initialize(monitor: :loss, factor: 0.5, patience: 5, min_lr: 1e-7, mode: :min)
|
|
74
|
+
@monitor = monitor
|
|
75
|
+
@factor = factor
|
|
76
|
+
@patience = patience
|
|
77
|
+
@min_lr = min_lr
|
|
78
|
+
@mode = mode
|
|
79
|
+
@wait = 0
|
|
80
|
+
@best_value = mode == :min ? Float::INFINITY : -Float::INFINITY
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def on_epoch_end(epoch, logs = {})
|
|
84
|
+
current = logs[@monitor]
|
|
85
|
+
return unless current
|
|
86
|
+
|
|
87
|
+
if improved?(current)
|
|
88
|
+
@best_value = current
|
|
89
|
+
@wait = 0
|
|
90
|
+
else
|
|
91
|
+
@wait += 1
|
|
92
|
+
if @wait >= @patience
|
|
93
|
+
old_lr = logs[:learning_rate]
|
|
94
|
+
new_lr = [old_lr * @factor, @min_lr].max
|
|
95
|
+
if new_lr < old_lr
|
|
96
|
+
logs[:learning_rate] = new_lr
|
|
97
|
+
puts "Reducing learning rate to #{new_lr.round(8)}"
|
|
98
|
+
end
|
|
99
|
+
@wait = 0
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
private
|
|
105
|
+
|
|
106
|
+
def improved?(current)
|
|
107
|
+
if @mode == :min
|
|
108
|
+
current < @best_value
|
|
109
|
+
else
|
|
110
|
+
current > @best_value
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# Model Checkpoint Callback
|
|
116
|
+
class ModelCheckpoint < Base
|
|
117
|
+
def initialize(filepath, monitor: :loss, save_best_only: true, mode: :min)
|
|
118
|
+
@filepath = filepath
|
|
119
|
+
@monitor = monitor
|
|
120
|
+
@save_best_only = save_best_only
|
|
121
|
+
@mode = mode
|
|
122
|
+
@best_value = mode == :min ? Float::INFINITY : -Float::INFINITY
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def on_epoch_end(epoch, logs = {})
|
|
126
|
+
current = logs[@monitor]
|
|
127
|
+
|
|
128
|
+
if @save_best_only
|
|
129
|
+
if improved?(current)
|
|
130
|
+
@best_value = current
|
|
131
|
+
save_model(logs[:model], epoch)
|
|
132
|
+
end
|
|
133
|
+
else
|
|
134
|
+
save_model(logs[:model], epoch)
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
private
|
|
139
|
+
|
|
140
|
+
def improved?(current)
|
|
141
|
+
if @mode == :min
|
|
142
|
+
current < @best_value
|
|
143
|
+
else
|
|
144
|
+
current > @best_value
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
def save_model(model, epoch)
|
|
149
|
+
filepath = @filepath.gsub('{epoch}', epoch.to_s)
|
|
150
|
+
Utils::Persistence.save_model(model, filepath)
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
# CSV Logger Callback
|
|
155
|
+
class CSVLogger < Base
|
|
156
|
+
def initialize(filename, separator: ',', append: false)
|
|
157
|
+
@filename = filename
|
|
158
|
+
@separator = separator
|
|
159
|
+
@append = append
|
|
160
|
+
@file = nil
|
|
161
|
+
@keys = nil
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
def on_train_begin(logs = {})
|
|
165
|
+
mode = @append ? 'a' : 'w'
|
|
166
|
+
@file = File.open(@filename, mode)
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
def on_epoch_end(epoch, logs = {})
|
|
170
|
+
if @keys.nil?
|
|
171
|
+
@keys = ['epoch'] + logs.keys.map(&:to_s).sort
|
|
172
|
+
@file.puts @keys.join(@separator) unless @append
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
values = [epoch] + @keys[1..-1].map { |k| logs[k.to_sym] || '' }
|
|
176
|
+
@file.puts values.join(@separator)
|
|
177
|
+
@file.flush
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
def on_train_end(logs = {})
|
|
181
|
+
@file.close if @file
|
|
182
|
+
end
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
# Progress Bar Callback
|
|
186
|
+
class ProgressBar < Base
|
|
187
|
+
def initialize(total_epochs)
|
|
188
|
+
@total_epochs = total_epochs
|
|
189
|
+
@start_time = nil
|
|
190
|
+
end
|
|
191
|
+
|
|
192
|
+
def on_train_begin(logs = {})
|
|
193
|
+
@start_time = Time.now
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
def on_epoch_end(epoch, logs = {})
|
|
197
|
+
progress = (epoch + 1).to_f / @total_epochs
|
|
198
|
+
bar_length = 30
|
|
199
|
+
filled = (bar_length * progress).to_i
|
|
200
|
+
bar = '=' * filled + '-' * (bar_length - filled)
|
|
201
|
+
|
|
202
|
+
elapsed = Time.now - @start_time
|
|
203
|
+
eta = elapsed / progress - elapsed
|
|
204
|
+
|
|
205
|
+
metrics = logs.map { |k, v| "#{k}: #{v.is_a?(Numeric) ? v.round(6) : v}" }.join(', ')
|
|
206
|
+
|
|
207
|
+
print "\rEpoch #{epoch + 1}/#{@total_epochs} [#{bar}] - #{elapsed.round(1)}s - ETA: #{eta.round(1)}s - #{metrics}"
|
|
208
|
+
puts if epoch + 1 == @total_epochs
|
|
209
|
+
end
|
|
210
|
+
end
|
|
211
|
+
end
|
|
212
|
+
end
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
module GRYDRA
|
|
2
|
+
module Documentation
|
|
3
|
+
METHOD_DESCRIPTIONS = {
|
|
4
|
+
# Networks
|
|
5
|
+
'Networks::MainNetwork.add_subnet' => {
|
|
6
|
+
description: 'Adds a subnet to the main network with the given structure.',
|
|
7
|
+
example: 'network = GRYDRA::Networks::MainNetwork.new; network.add_subnet([2, 4, 1])'
|
|
8
|
+
},
|
|
9
|
+
'Networks::EasyNetwork.train_numerical' => {
|
|
10
|
+
description: 'Trains the network with numerical data.',
|
|
11
|
+
example: 'network.train_numerical(data_input, data_output, [[4, 1]], 0.05, 15000, :max)'
|
|
12
|
+
},
|
|
13
|
+
'Networks::EasyNetwork.configure_adam_optimizer' => {
|
|
14
|
+
description: 'Configures the Adam optimizer for all subnets.',
|
|
15
|
+
example: 'network.configure_adam_optimizer(0.001, 0.9, 0.999)'
|
|
16
|
+
},
|
|
17
|
+
|
|
18
|
+
# Metrics
|
|
19
|
+
'Metrics.mse' => {
|
|
20
|
+
description: 'Calculates Mean Squared Error.',
|
|
21
|
+
example: 'GRYDRA::Metrics.mse(predictions, actuals)'
|
|
22
|
+
},
|
|
23
|
+
'Metrics.accuracy' => {
|
|
24
|
+
description: 'Calculates classification accuracy.',
|
|
25
|
+
example: 'GRYDRA::Metrics.accuracy(predictions, actuals, 0.5)'
|
|
26
|
+
},
|
|
27
|
+
|
|
28
|
+
# Persistence
|
|
29
|
+
'Utils::Persistence.save_model' => {
|
|
30
|
+
description: 'Saves the trained model to a binary file.',
|
|
31
|
+
example: 'GRYDRA::Utils::Persistence.save_model(model, "my_model", "./models")'
|
|
32
|
+
},
|
|
33
|
+
'Utils::Persistence.load_model' => {
|
|
34
|
+
description: 'Loads a saved model from a binary file.',
|
|
35
|
+
example: 'model = GRYDRA::Utils::Persistence.load_model("my_model", "./models")'
|
|
36
|
+
},
|
|
37
|
+
|
|
38
|
+
# Visualization
|
|
39
|
+
'Utils::Visualization.plot_architecture_ascii' => {
|
|
40
|
+
description: 'Displays an ASCII representation of the network architecture.',
|
|
41
|
+
example: 'GRYDRA::Utils::Visualization.plot_architecture_ascii(model)'
|
|
42
|
+
},
|
|
43
|
+
'Utils::Visualization.analyze_gradients' => {
|
|
44
|
+
description: 'Analyzes the model\'s gradients.',
|
|
45
|
+
example: 'analysis = GRYDRA::Utils::Visualization.analyze_gradients(model)'
|
|
46
|
+
},
|
|
47
|
+
|
|
48
|
+
# Training
|
|
49
|
+
'Training::CrossValidation.cross_validation' => {
|
|
50
|
+
description: 'Performs k-fold cross-validation.',
|
|
51
|
+
example: 'result = GRYDRA::Training::CrossValidation.cross_validation(data_x, data_y, 5) { |train_x, train_y, test_x, test_y| ... }'
|
|
52
|
+
},
|
|
53
|
+
'Training::HyperparameterSearch.hyperparameter_search' => {
|
|
54
|
+
description: 'Performs hyperparameter search using grid search.',
|
|
55
|
+
example: 'result = GRYDRA::Training::HyperparameterSearch.hyperparameter_search(data_x, data_y, param_grid) { |params, x, y| ... }'
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
def self.describe_method(class_name, method_name)
|
|
60
|
+
key = "#{class_name}.#{method_name}"
|
|
61
|
+
info = METHOD_DESCRIPTIONS[key]
|
|
62
|
+
|
|
63
|
+
if info
|
|
64
|
+
puts "\e[1;36m📖 Description of #{key}:\e[0m"
|
|
65
|
+
puts info[:description]
|
|
66
|
+
puts "\n💡 Example:"
|
|
67
|
+
puts "#{info[:example]}\e[0m"
|
|
68
|
+
else
|
|
69
|
+
puts "\e[31;1m✗ No description found for method '#{key}'\e[0m"
|
|
70
|
+
puts "\e[36m💡 You can call: GRYDRA::Documentation.list_methods_available\e[0m"
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def self.list_methods_available
|
|
75
|
+
puts "\e[1;36m📚 Documented public methods:\e[0m"
|
|
76
|
+
grouped = METHOD_DESCRIPTIONS.keys.group_by { |k| k.split('.').first }
|
|
77
|
+
grouped.each do |class_name, methods|
|
|
78
|
+
puts " #{class_name}:"
|
|
79
|
+
methods.each { |m| puts " - #{m.split('.').last}" }
|
|
80
|
+
end
|
|
81
|
+
print "\e[0m"
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
end
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
module GRYDRA
|
|
2
|
+
module Initializers
|
|
3
|
+
# Xavier/Glorot initialization
|
|
4
|
+
def self.xavier_init(num_inputs)
|
|
5
|
+
limit = Math.sqrt(6.0 / num_inputs)
|
|
6
|
+
rand * 2 * limit - limit
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
# He initialization (for ReLU)
|
|
10
|
+
def self.he_init(num_inputs)
|
|
11
|
+
Math.sqrt(2.0 / num_inputs) * (rand * 2 - 1)
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
module GRYDRA
|
|
2
|
+
module Layers
|
|
3
|
+
# Convolutional Layer - Full 2D implementation
|
|
4
|
+
class Conv < Base
|
|
5
|
+
attr_accessor :filters, :kernel_size, :stride, :padding, :weights, :biases
|
|
6
|
+
|
|
7
|
+
def initialize(filters, kernel_size, stride: 1, padding: 0, activation: :relu, input_channels: 1)
|
|
8
|
+
@filters = filters
|
|
9
|
+
@kernel_size = kernel_size
|
|
10
|
+
@stride = stride
|
|
11
|
+
@padding = padding
|
|
12
|
+
@activation = activation
|
|
13
|
+
@input_channels = input_channels
|
|
14
|
+
|
|
15
|
+
# Initialize weights using He initialization for ReLU
|
|
16
|
+
@weights = Array.new(filters) do
|
|
17
|
+
Array.new(input_channels) do
|
|
18
|
+
Array.new(kernel_size) do
|
|
19
|
+
Array.new(kernel_size) { Initializers.he_init(kernel_size * kernel_size) }
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
@biases = Array.new(filters, 0.0)
|
|
24
|
+
@output_cache = nil
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def calculate_outputs(input)
|
|
28
|
+
# Input shape: [channels, height, width]
|
|
29
|
+
channels, height, width = input_shape(input)
|
|
30
|
+
|
|
31
|
+
# Apply padding if needed
|
|
32
|
+
padded_input = apply_padding(input) if @padding > 0
|
|
33
|
+
padded_input ||= input
|
|
34
|
+
|
|
35
|
+
# Calculate output dimensions
|
|
36
|
+
out_height = ((height + 2 * @padding - @kernel_size) / @stride) + 1
|
|
37
|
+
out_width = ((width + 2 * @padding - @kernel_size) / @stride) + 1
|
|
38
|
+
|
|
39
|
+
# Perform convolution
|
|
40
|
+
output = Array.new(@filters) do |f|
|
|
41
|
+
Array.new(out_height) do |i|
|
|
42
|
+
Array.new(out_width) do |j|
|
|
43
|
+
convolve_at_position(padded_input, f, i, j)
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
@output_cache = output
|
|
49
|
+
output
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
private
|
|
53
|
+
|
|
54
|
+
def input_shape(input)
|
|
55
|
+
if input.is_a?(Array) && input.first.is_a?(Array) && input.first.first.is_a?(Array)
|
|
56
|
+
[input.size, input.first.size, input.first.first.size]
|
|
57
|
+
else
|
|
58
|
+
# Flatten input assumed
|
|
59
|
+
[1, 1, input.size]
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def apply_padding(input)
|
|
64
|
+
channels, height, width = input_shape(input)
|
|
65
|
+
padded = Array.new(channels) do |c|
|
|
66
|
+
Array.new(height + 2 * @padding) do |i|
|
|
67
|
+
Array.new(width + 2 * @padding) do |j|
|
|
68
|
+
if i < @padding || i >= height + @padding || j < @padding || j >= width + @padding
|
|
69
|
+
0.0
|
|
70
|
+
else
|
|
71
|
+
input[c][i - @padding][j - @padding]
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
padded
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def convolve_at_position(input, filter_idx, out_i, out_j)
|
|
80
|
+
sum = @biases[filter_idx]
|
|
81
|
+
|
|
82
|
+
@input_channels.times do |c|
|
|
83
|
+
@kernel_size.times do |ki|
|
|
84
|
+
@kernel_size.times do |kj|
|
|
85
|
+
i = out_i * @stride + ki
|
|
86
|
+
j = out_j * @stride + kj
|
|
87
|
+
sum += input[c][i][j] * @weights[filter_idx][c][ki][kj]
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
apply_activation(sum)
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def apply_activation(value)
|
|
96
|
+
case @activation
|
|
97
|
+
when :relu then Activations.relu(value)
|
|
98
|
+
when :tanh then Activations.tanh(value)
|
|
99
|
+
when :sigmoid then Activations.sigmoid(value)
|
|
100
|
+
when :leaky_relu then Activations.leaky_relu(value)
|
|
101
|
+
else value
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
module GRYDRA
|
|
2
|
+
module Layers
|
|
3
|
+
# Dense (Fully Connected) Layer
|
|
4
|
+
class Dense < Base
|
|
5
|
+
attr_accessor :neurons, :activation
|
|
6
|
+
|
|
7
|
+
def initialize(num_neurons, inputs_per_neuron, activation = :tanh)
|
|
8
|
+
@activation = activation
|
|
9
|
+
@neurons = Array.new(num_neurons) { Networks::Neuron.new(inputs_per_neuron, activation) }
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def calculate_outputs(inputs, apply_dropout = false, dropout_rate = 0.5)
|
|
13
|
+
@neurons.map { |neuron| neuron.calculate_output(inputs, apply_dropout, dropout_rate) }
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
module GRYDRA
|
|
2
|
+
module Layers
|
|
3
|
+
# LSTM Layer - Full implementation with backpropagation support
|
|
4
|
+
class LSTM < Base
|
|
5
|
+
attr_accessor :units, :cell_state, :hidden_state
|
|
6
|
+
attr_reader :weights, :biases
|
|
7
|
+
|
|
8
|
+
def initialize(units, inputs_per_unit, return_sequences: false)
|
|
9
|
+
@units = units
|
|
10
|
+
@inputs_per_unit = inputs_per_unit
|
|
11
|
+
@return_sequences = return_sequences
|
|
12
|
+
|
|
13
|
+
combined_size = inputs_per_unit + units
|
|
14
|
+
|
|
15
|
+
# Initialize weight matrices for all gates
|
|
16
|
+
@weights = {
|
|
17
|
+
forget: initialize_weights(combined_size, units),
|
|
18
|
+
input: initialize_weights(combined_size, units),
|
|
19
|
+
candidate: initialize_weights(combined_size, units),
|
|
20
|
+
output: initialize_weights(combined_size, units)
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
@biases = {
|
|
24
|
+
forget: Array.new(units, 1.0), # Bias forget gate to 1 initially
|
|
25
|
+
input: Array.new(units, 0.0),
|
|
26
|
+
candidate: Array.new(units, 0.0),
|
|
27
|
+
output: Array.new(units, 0.0)
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
# Cache for backpropagation
|
|
31
|
+
@cache = []
|
|
32
|
+
|
|
33
|
+
reset_state
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def reset_state
|
|
37
|
+
@cell_state = Array.new(@units, 0.0)
|
|
38
|
+
@hidden_state = Array.new(@units, 0.0)
|
|
39
|
+
@cache.clear
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def calculate_outputs(input_sequence)
|
|
43
|
+
outputs = []
|
|
44
|
+
|
|
45
|
+
input_sequence = [input_sequence] unless input_sequence.first.is_a?(Array)
|
|
46
|
+
|
|
47
|
+
input_sequence.each do |input|
|
|
48
|
+
combined = input + @hidden_state
|
|
49
|
+
|
|
50
|
+
# Forget gate
|
|
51
|
+
f_t = gate_activation(combined, @weights[:forget], @biases[:forget], :sigmoid)
|
|
52
|
+
|
|
53
|
+
# Input gate
|
|
54
|
+
i_t = gate_activation(combined, @weights[:input], @biases[:input], :sigmoid)
|
|
55
|
+
|
|
56
|
+
# Candidate values
|
|
57
|
+
c_tilde = gate_activation(combined, @weights[:candidate], @biases[:candidate], :tanh)
|
|
58
|
+
|
|
59
|
+
# Update cell state
|
|
60
|
+
@cell_state = @cell_state.zip(f_t, i_t, c_tilde).map do |c, f, i, c_t|
|
|
61
|
+
f * c + i * c_t
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Output gate
|
|
65
|
+
o_t = gate_activation(combined, @weights[:output], @biases[:output], :sigmoid)
|
|
66
|
+
|
|
67
|
+
# Update hidden state
|
|
68
|
+
@hidden_state = o_t.zip(@cell_state).map { |o, c| o * Math.tanh(c) }
|
|
69
|
+
|
|
70
|
+
# Cache for backpropagation
|
|
71
|
+
@cache << {
|
|
72
|
+
input: input,
|
|
73
|
+
combined: combined,
|
|
74
|
+
f_t: f_t,
|
|
75
|
+
i_t: i_t,
|
|
76
|
+
c_tilde: c_tilde,
|
|
77
|
+
o_t: o_t,
|
|
78
|
+
cell_state: @cell_state.dup,
|
|
79
|
+
hidden_state: @hidden_state.dup
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
outputs << @hidden_state.dup
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
@return_sequences ? outputs : outputs.last
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def backward(d_hidden, learning_rate: 0.01)
|
|
89
|
+
# Simplified backpropagation through time
|
|
90
|
+
d_cell = Array.new(@units, 0.0)
|
|
91
|
+
|
|
92
|
+
@cache.reverse.each do |cache|
|
|
93
|
+
# Gradient through output gate
|
|
94
|
+
d_o = d_hidden.zip(cache[:cell_state]).map { |dh, c| dh * Math.tanh(c) }
|
|
95
|
+
d_cell = d_hidden.zip(cache[:o_t], cache[:cell_state]).map do |dh, o, c|
|
|
96
|
+
d_cell_val = dh * o * (1 - Math.tanh(c)**2)
|
|
97
|
+
d_cell_val
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# Update weights (simplified)
|
|
101
|
+
update_gate_weights(:output, cache[:combined], d_o, learning_rate)
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
private
|
|
106
|
+
|
|
107
|
+
def initialize_weights(input_size, output_size)
|
|
108
|
+
Array.new(output_size) do
|
|
109
|
+
Array.new(input_size) { Initializers.xavier_init(input_size) }
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def gate_activation(input, weights, biases, activation)
|
|
114
|
+
output = Array.new(@units) do |i|
|
|
115
|
+
sum = biases[i]
|
|
116
|
+
input.each_with_index { |x, j| sum += x * weights[i][j] }
|
|
117
|
+
sum
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
output.map do |val|
|
|
121
|
+
case activation
|
|
122
|
+
when :sigmoid then Activations.sigmoid(val)
|
|
123
|
+
when :tanh then Activations.tanh(val)
|
|
124
|
+
else val
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
def update_gate_weights(gate, input, gradient, learning_rate)
|
|
130
|
+
gradient.each_with_index do |grad, i|
|
|
131
|
+
input.each_with_index do |inp, j|
|
|
132
|
+
@weights[gate][i][j] += learning_rate * grad * inp
|
|
133
|
+
end
|
|
134
|
+
@biases[gate][i] += learning_rate * grad
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
end
|