grydra 1.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/gr/core.rb DELETED
@@ -1,1926 +0,0 @@
1
- module GRYDRA
2
- require 'set'
3
- def self.tanh(x)
4
- Math.tanh(x)
5
- end
6
- def self.derivative_tanh(x)
7
- 1 - tanh(x)**2
8
- end
9
- def self.relu(x)
10
- x > 0 ? x : 0
11
- end
12
- def self.derivative_relu(x)
13
- x > 0 ? 1 : 0
14
- end
15
- def self.sigmoid(x)
16
- 1.0 / (1.0 + Math.exp(-x))
17
- end
18
- def self.derivative_sigmoid(x)
19
- s = sigmoid(x)
20
- s * (1 - s)
21
- end
22
- def self.softmax(vector)
23
- max = vector.max
24
- exps = vector.map { |v| Math.exp(v - max) }
25
- sum = exps.sum
26
- exps.map { |v| v / sum }
27
- end
28
- def self.leaky_relu(x, alpha = 0.01)
29
- x > 0 ? x : alpha * x
30
- end
31
- def self.derivative_leaky_relu(x, alpha = 0.01)
32
- x > 0 ? 1 : alpha
33
- end
34
- def self.swish(x)
35
- x * sigmoid(x)
36
- end
37
- def self.derivative_swish(x)
38
- s = sigmoid(x)
39
- s + x * s * (1 - s)
40
- end
41
- def self.gelu(x)
42
- 0.5 * x * (1 + Math.tanh(Math.sqrt(2 / Math::PI) * (x + 0.044715 * x**3)))
43
- end
44
- def self.derivative_gelu(x)
45
- tanh_arg = Math.sqrt(2 / Math::PI) * (x + 0.044715 * x**3)
46
- tanh_val = Math.tanh(tanh_arg)
47
- sech2 = 1 - tanh_val**2
48
- 0.5 * (1 + tanh_val) + 0.5 * x * sech2 * Math.sqrt(2 / Math::PI) * (1 + 3 * 0.044715 * x**2)
49
- end
50
- def self.apply_dropout(outputs, dropout_rate = 0.5, training = true)
51
- return outputs unless training
52
- outputs.map { |s| rand < dropout_rate ? 0 : s / (1 - dropout_rate) }
53
- end
54
- def self.l1_regularization(weights, lambda_l1)
55
- lambda_l1 * weights.sum { |p| p.abs }
56
- end
57
- def self.l2_regularization(weights, lambda_l2)
58
- lambda_l2 * weights.sum { |p| p**2 }
59
- end
60
- def self.xavier_init(num_inputs)
61
- limit = Math.sqrt(6.0 / num_inputs)
62
- rand * 2 * limit - limit
63
- end
64
- def self.he_init(num_inputs)
65
- Math.sqrt(2.0 / num_inputs) * (rand * 2 - 1)
66
- end
67
- def self.zscore_normalize(data)
68
- n = data.size
69
- means = data.first.size.times.map { |i| data.map { |row| row[i] }.sum.to_f / n }
70
- std_devs = data.first.size.times.map do |i|
71
- m = means[i]
72
- Math.sqrt(data.map { |row| (row[i] - m)**2 }.sum.to_f / n)
73
- end
74
- normalized = data.map do |row|
75
- row.each_with_index.map { |value, i| std_devs[i] != 0 ? (value - means[i]) / std_devs[i] : 0 }
76
- end
77
- [normalized, means, std_devs]
78
- end
79
- def self.zscore_denormalize(normalized, means, std_devs)
80
- normalized.map do |row|
81
- row.each_with_index.map { |value, i| value * std_devs[i] + means[i] }
82
- end
83
- end
84
- def self.min_max_normalize(data, min_val = 0, max_val = 1)
85
- data_min = data.flatten.min
86
- data_max = data.flatten.max
87
- range = data_max - data_min
88
- return data if range == 0
89
- data.map do |row|
90
- row.map { |v| min_val + (v - data_min) * (max_val - min_val) / range }
91
- end
92
- end
93
- def self.mse(predictions, actuals)
94
- n = predictions.size
95
- sum = predictions.zip(actuals).map { |p, r| (p - r)**2 }.sum
96
- sum / n.to_f
97
- end
98
- def self.mae(predictions, actuals)
99
- n = predictions.size
100
- sum = predictions.zip(actuals).map { |p, r| (p - r).abs }.sum
101
- sum / n.to_f
102
- end
103
- def self.precision(tp, fp)
104
- tp.to_f / (tp + fp)
105
- end
106
- def self.recall(tp, fn)
107
- tp.to_f / (tp + fn)
108
- end
109
- def self.f1(precision, recall)
110
- 2 * (precision * recall) / (precision + recall)
111
- end
112
- def self.confusion_matrix(predictions, actuals, threshold = 0.5)
113
- tp = fp = tn = fn = 0
114
- predictions.zip(actuals).each do |pred, actual|
115
- pred_bin = pred > threshold ? 1 : 0
116
- case [pred_bin, actual]
117
- when [1, 1] then tp += 1
118
- when [1, 0] then fp += 1
119
- when [0, 0] then tn += 1
120
- when [0, 1] then fn += 1
121
- end
122
- end
123
- { tp: tp, fp: fp, tn: tn, fn: fn }
124
- end
125
- def self.auc_roc(predictions, actuals)
126
- # Implementation of Area Under the ROC Curve
127
- pairs = predictions.zip(actuals).sort_by { |pred, _| -pred }
128
- positives = actuals.count(1)
129
- negatives = actuals.count(0)
130
- return 0.5 if positives == 0 || negatives == 0
131
- auc = 0.0
132
- fp = 0
133
- pairs.each do |_, actual|
134
- if actual == 1
135
- auc += fp
136
- else
137
- fp += 1
138
- end
139
- end
140
- auc / (positives * negatives).to_f
141
- end
142
- def self.accuracy(predictions, actuals, threshold = 0.5)
143
- correct = predictions.zip(actuals).count { |pred, actual| (pred > threshold ? 1 : 0) == actual }
144
- correct.to_f / predictions.size
145
- end
146
- ### ADVANCED OPTIMIZERS ###
147
- class AdamOptimizer
148
- def initialize(alpha = 0.001, beta1 = 0.9, beta2 = 0.999, epsilon = 1e-8)
149
- @alpha = alpha
150
- @beta1 = beta1
151
- @beta2 = beta2
152
- @epsilon = epsilon
153
- @m = {}
154
- @v = {}
155
- @t = 0
156
- end
157
- def update(parameter_id, gradient)
158
- @t += 1
159
- @m[parameter_id] ||= 0
160
- @v[parameter_id] ||= 0
161
- @m[parameter_id] = @beta1 * @m[parameter_id] + (1 - @beta1) * gradient
162
- @v[parameter_id] = @beta2 * @v[parameter_id] + (1 - @beta2) * gradient**2
163
- m_hat = @m[parameter_id] / (1 - @beta1**@t)
164
- v_hat = @v[parameter_id] / (1 - @beta2**@t)
165
- @alpha * m_hat / (Math.sqrt(v_hat) + @epsilon)
166
- end
167
- def reset
168
- @m.clear
169
- @v.clear
170
- @t = 0
171
- end
172
- end
173
- ### CROSS-VALIDATION ###
174
- def self.cross_validation(data_input, data_output, k_folds = 5)
175
- indices = (0...data_input.size).to_a.shuffle
176
- fold_size = data_input.size / k_folds
177
- errors = []
178
- k_folds.times do |i|
179
- start = i * fold_size
180
- finish = [start + fold_size, data_input.size].min
181
- indices_test = indices[start...finish]
182
- indices_train = indices - indices_test
183
- # Split data
184
- train_x = indices_train.map { |idx| data_input[idx] }
185
- train_y = indices_train.map { |idx| data_output[idx] }
186
- test_x = indices_test.map { |idx| data_input[idx] }
187
- test_y = indices_test.map { |idx| data_output[idx] }
188
- # Train and evaluate
189
- error = yield(train_x, train_y, test_x, test_y)
190
- errors << error
191
- end
192
- {
193
- errors: errors,
194
- average: errors.sum / errors.size.to_f,
195
- deviation: Math.sqrt(errors.map { |e| (e - errors.sum / errors.size.to_f)**2 }.sum / errors.size.to_f)
196
- }
197
- end
198
- ### ANALYSIS AND VISUALIZATION ###
199
- def self.analyze_gradients(model)
200
- gradients = []
201
- if model.respond_to?(:layers)
202
- model.layers.each do |layer|
203
- layer.neurons.each do |neuron|
204
- gradients << neuron.delta.abs if neuron.delta
205
- end
206
- end
207
- elsif model.respond_to?(:subnets)
208
- model.subnets.each do |subnet|
209
- subnet.layers.each do |layer|
210
- layer.neurons.each do |neuron|
211
- gradients << neuron.delta.abs if neuron.delta
212
- end
213
- end
214
- end
215
- end
216
- return { message: 'No gradients to analyze' } if gradients.empty?
217
- average = gradients.sum / gradients.size.to_f
218
- {
219
- average: average,
220
- maximum: gradients.max,
221
- minimum: gradients.min,
222
- deviation: Math.sqrt(gradients.map { |g| (g - average)**2 }.sum / gradients.size.to_f),
223
- total_parameters: gradients.size
224
- }
225
- end
226
- def self.plot_architecture_ascii(model)
227
- puts "
228
- Network Architecture:"
229
- puts '=' * 50
230
- if model.respond_to?(:subnets)
231
- model.subnets.each_with_index do |subnet, idx|
232
- puts "
233
- Subnet #{idx + 1}:"
234
- plot_individual_network(subnet)
235
- end
236
- else
237
- plot_individual_network(model)
238
- end
239
- puts '=' * 50
240
- end
241
- def self.plot_individual_network(network)
242
- network.layers.each_with_index do |layer, i|
243
- neurons = layer.neurons.size
244
- activation = layer.activation || :linear
245
- # Visual representation of neurons
246
- symbols = if neurons <= 10
247
- '●' * neurons
248
- else
249
- '●' * 8 + "... (#{neurons})"
250
- end
251
- puts " Layer #{i + 1}: #{symbols} [#{activation}]"
252
- puts " #{' ' * 8}↓" unless i == network.layers.size - 1
253
- end
254
- end
255
- ### ADVANCED PREPROCESSING ###
256
- def self.pca(data, components = 2)
257
- n = data.size
258
- m = data.first.size
259
- # Center data
260
- means = (0...m).map { |i| data.map { |row| row[i] }.sum.to_f / n }
261
- centered_data = data.map { |row| row.zip(means).map { |v, mean| v - mean } }
262
- # Calculate covariance matrix (simplified)
263
- covariance = Array.new(m) { Array.new(m, 0) }
264
- (0...m).each do |i|
265
- (0...m).each do |j|
266
- covariance[i][j] = centered_data.map { |row| row[i] * row[j] }.sum / (n - 1).to_f
267
- end
268
- end
269
- # Note: A complete PCA implementation would require eigenvalue/eigenvector calculation
270
- # This is a simplified version that returns the first components
271
- puts ' Simplified PCA - For complete analysis use specialized libraries'
272
- {
273
- means: means,
274
- covariance: covariance,
275
- centered_data: centered_data[0...components]
276
- }
277
- end
278
- ### UTILITY FUNCTIONS ###
279
- def self.split_data(data_x, data_y, training_ratio = 0.8, seed = nil)
280
- srand(seed) if seed
281
- indices = (0...data_x.size).to_a.shuffle
282
- cut = (data_x.size * training_ratio).to_i
283
- {
284
- train_x: indices[0...cut].map { |i| data_x[i] },
285
- train_y: indices[0...cut].map { |i| data_y[i] },
286
- test_x: indices[cut..-1].map { |i| data_x[i] },
287
- test_y: indices[cut..-1].map { |i| data_y[i] }
288
- }
289
- end
290
- def self.hyperparameter_search(data_x, data_y, param_grid)
291
- best_params = nil
292
- best_score = Float::INFINITY
293
- results = []
294
- puts ' Starting hyperparameter search...'
295
- param_grid.each_with_index do |params, idx|
296
- puts " Testing configuration #{idx + 1}/#{param_grid.size}: #{params}"
297
- begin
298
- score = yield(params, data_x, data_y)
299
- results << { parameters: params, score: score }
300
- if score < best_score
301
- best_score = score
302
- best_params = params
303
- puts " New best configuration found! Score: #{score.round(6)}"
304
- else
305
- puts " Score: #{score.round(6)}"
306
- end
307
- rescue StandardError => e
308
- puts " Error with this configuration: #{e.message}"
309
- results << { parameters: params, score: Float::INFINITY, error: e.message }
310
- end
311
- end
312
- puts "
313
- Best parameters found:"
314
- puts " Configuration: #{best_params}"
315
- puts " Score: #{best_score.round(6)}"
316
- {
317
- parameters: best_params,
318
- score: best_score,
319
- all_results: results.sort_by { |r| r[:score] }
320
- }
321
- end
322
- def self.generate_synthetic_data(n_samples, n_features, noise = 0.1, seed = nil)
323
- srand(seed) if seed
324
- data = Array.new(n_samples) do
325
- Array.new(n_features) { rand * 2 - 1 + (rand * noise - noise / 2) }
326
- end
327
- # Generate labels based on a simple function
328
- labels = data.map do |sample|
329
- # Simple function: weighted sum + noise
330
- value = sample.each_with_index.sum { |x, i| x * (i + 1) * 0.1 }
331
- [value + (rand * noise - noise / 2)]
332
- end
333
- { data: data, labels: labels }
334
- end
335
- ### NEURON CLASS ###
336
- class Neuron
337
- attr_accessor :weights, :bias, :output, :delta
338
- def initialize(inputs, activation = :tanh)
339
- unless inputs.is_a?(Integer) && inputs > 0
340
- raise ArgumentError,
341
- 'Number of inputs must be a positive integer'
342
- end
343
- @weights = case activation
344
- when :relu, :leaky_relu
345
- Array.new(inputs) { GRYDRA.he_init(inputs) }
346
- else
347
- Array.new(inputs) { GRYDRA.xavier_init(inputs) }
348
- end
349
- @bias = case activation
350
- when :relu, :leaky_relu
351
- GRYDRA.he_init(inputs)
352
- else
353
- GRYDRA.xavier_init(inputs)
354
- end
355
- @output = 0
356
- @delta = 0
357
- @activation = activation
358
- @sum = 0
359
- @dropout_mask = nil
360
- end
361
- def calculate_output(inputs, apply_dropout = false, dropout_rate = 0.5)
362
- unless inputs.is_a?(Array) && inputs.all? { |e| e.is_a?(Numeric) }
363
- raise ArgumentError,
364
- 'Inputs must be an array of numbers'
365
- end
366
- raise "Error: inputs (#{inputs.size}) do not match weights (#{@weights.size})" if @weights.size != inputs.size
367
- @sum = @weights.zip(inputs).map { |weight, input| weight * input }.sum + @bias
368
- @output = case @activation
369
- when :tanh then GRYDRA.tanh(@sum)
370
- when :relu then GRYDRA.relu(@sum)
371
- when :sigmoid then GRYDRA.sigmoid(@sum)
372
- when :leaky_relu then GRYDRA.leaky_relu(@sum)
373
- when :swish then GRYDRA.swish(@sum)
374
- when :gelu then GRYDRA.gelu(@sum)
375
- else @sum
376
- end
377
- if apply_dropout
378
- @dropout_mask = rand < dropout_rate ? 0 : 1 / (1 - dropout_rate)
379
- @output *= @dropout_mask
380
- end
381
- @output
382
- end
383
- def derivative_activation
384
- case @activation
385
- when :tanh then GRYDRA.derivative_tanh(@output)
386
- when :relu then GRYDRA.derivative_relu(@output)
387
- when :sigmoid then GRYDRA.derivative_sigmoid(@sum)
388
- when :leaky_relu then GRYDRA.derivative_leaky_relu(@sum)
389
- when :swish then GRYDRA.derivative_swish(@sum)
390
- when :gelu then GRYDRA.derivative_gelu(@sum)
391
- else 1
392
- end
393
- end
394
- end
395
- ### BASE LAYER CLASS ###
396
- class Layer
397
- def calculate_outputs(inputs)
398
- raise NotImplementedError, 'Implement in subclass'
399
- end
400
- end
401
- ### DENSE LAYER CLASS ###
402
- class DenseLayer < Layer
403
- attr_accessor :neurons, :activation
404
- def initialize(num_neurons, inputs_per_neuron, activation = :tanh)
405
- @activation = activation
406
- @neurons = Array.new(num_neurons) { Neuron.new(inputs_per_neuron, activation) }
407
- end
408
- def calculate_outputs(inputs, apply_dropout = false, dropout_rate = 0.5)
409
- @neurons.map { |neuron| neuron.calculate_output(inputs, apply_dropout, dropout_rate) }
410
- end
411
- end
412
- class ConvLayer < Layer
413
- attr_accessor :filters, :kernel_size, :stride, :padding
414
- def initialize(filters, kernel_size, stride = 1, padding = 0, activation = :relu)
415
- @filters = filters
416
- @kernel_size = kernel_size
417
- @stride = stride
418
- @padding = padding
419
- @activation = activation
420
- @weights = Array.new(filters) { Array.new(kernel_size * kernel_size) { rand * 0.1 - 0.05 } }
421
- @biases = Array.new(filters) { rand * 0.1 - 0.05 }
422
- end
423
- def calculate_outputs(input)
424
- # Simplified 2D convolution implementation
425
- # For a complete implementation, 2D matrix handling would be needed
426
- puts '⚠️ Simplified Convolutional Layer - For full use, implement 2D convolution'
427
- input.map { |x| x * 0.5 } # Placeholder
428
- end
429
- end
430
- class LSTMLayer < Layer
431
- attr_accessor :units
432
- def initialize(units, inputs_per_unit)
433
- @units = units
434
- @inputs_per_unit = inputs_per_unit
435
- # Initialize gates (forget, input, output)
436
- @forget_gate = DenseLayer.new(units, inputs_per_unit + units, :sigmoid)
437
- @input_gate = DenseLayer.new(units, inputs_per_unit + units, :sigmoid)
438
- @output_gate = DenseLayer.new(units, inputs_per_unit + units, :sigmoid)
439
- @candidates = DenseLayer.new(units, inputs_per_unit + units, :tanh)
440
- @cell_state = Array.new(units, 0)
441
- @hidden_state = Array.new(units, 0)
442
- end
443
- def calculate_outputs(input)
444
- # Simplified LSTM implementation
445
- puts '⚠️ Simplified LSTM Layer - For full use, implement all gates'
446
- combined_input = input + @hidden_state
447
- # Calculate gates
448
- f_t = @forget_gate.calculate_outputs(combined_input)
449
- i_t = @input_gate.calculate_outputs(combined_input)
450
- o_t = @output_gate.calculate_outputs(combined_input)
451
- c_candidate = @candidates.calculate_outputs(combined_input)
452
- # Update cell state
453
- @cell_state = @cell_state.zip(f_t, i_t, c_candidate).map do |c, f, i, candidate|
454
- f * c + i * candidate
455
- end
456
- # Calculate output
457
- @hidden_state = o_t.zip(@cell_state).map { |o, c| o * Math.tanh(c) }
458
- @hidden_state
459
- end
460
- end
461
- ### NEURAL NETWORK CLASS ###
462
- class NeuralNetwork
463
- attr_accessor :layers
464
- def initialize(structure, print_epochs = false, plot = false, activations = nil)
465
- @print_epochs = print_epochs
466
- @plot = plot
467
- @layers = []
468
- @history_error = []
469
- @optimizer = nil
470
- activations ||= Array.new(structure.size - 1, :tanh)
471
- structure.each_cons(2).with_index do |(inputs, outputs), i|
472
- @layers << DenseLayer.new(outputs, inputs, activations[i])
473
- end
474
- end
475
- def use_adam_optimizer(alpha = 0.001, beta1 = 0.9, beta2 = 0.999)
476
- @optimizer = AdamOptimizer.new(alpha, beta1, beta2)
477
- end
478
- def calculate_outputs(inputs, apply_dropout = false, dropout_rate = 0.5)
479
- raise ArgumentError, 'Inputs must be an array of numbers' unless inputs.is_a?(Array) && inputs.all? { |e| e.is_a?(Numeric) }
480
- @layers.inject(inputs) { |outputs, layer| layer.calculate_outputs(outputs, apply_dropout, dropout_rate) }
481
- end
482
- # Training with mini-batch, early stopping, decay learning rate, and regularization
483
- def train(data_input, data_output, learning_rate, epochs, error_threshold = nil,
484
- batch_size: 1, patience: 50, decay: 0.95, lambda_l1: 0, lambda_l2: 0,
485
- dropout: false, dropout_rate: 0.5)
486
- best_error = Float::INFINITY
487
- patience_counter = 0
488
- epochs.times do |epoch|
489
- error_total = 0
490
- error_regularization = 0
491
- # Shuffle data
492
- indices = (0...data_input.size).to_a.shuffle
493
- data_input = indices.map { |i| data_input[i] }
494
- data_output = indices.map { |i| data_output[i] }
495
- data_input.each_slice(batch_size).with_index do |batch_inputs, batch_idx|
496
- batch_outputs_real = data_output[batch_idx * batch_size, batch_size]
497
- batch_inputs.zip(batch_outputs_real).each do |input, output_real|
498
- outputs = calculate_outputs(input, dropout, dropout_rate)
499
- errors = outputs.zip(output_real).map { |output, real| real - output }
500
- error_total += errors.map { |e| e**2 }.sum / errors.size
501
- if lambda_l1 > 0 || lambda_l2 > 0
502
- @layers.each do |layer|
503
- layer.neurons.each do |neuron|
504
- error_regularization += GRYDRA.l1_regularization(neuron.weights, lambda_l1) if lambda_l1 > 0
505
- error_regularization += GRYDRA.l2_regularization(neuron.weights, lambda_l2) if lambda_l2 > 0
506
- end
507
- end
508
- end
509
- # Output layer
510
- @layers.last.neurons.each_with_index do |neuron, i|
511
- neuron.delta = errors[i] * neuron.derivative_activation
512
- end
513
- # Backpropagation hidden layers
514
- (@layers.size - 2).downto(0) do |i|
515
- @layers[i].neurons.each_with_index do |neuron, j|
516
- sum_deltas = @layers[i + 1].neurons.sum { |n| n.weights[j] * n.delta }
517
- neuron.delta = sum_deltas * neuron.derivative_activation
518
- end
519
- end
520
- # Update weights and bias
521
- @layers.each_with_index do |layer, idx|
522
- inputs_layer = idx.zero? ? input : @layers[idx - 1].neurons.map(&:output)
523
- layer.neurons.each_with_index do |neuron, neuron_idx|
524
- neuron.weights.each_with_index do |_weight, i|
525
- gradient = neuron.delta * inputs_layer[i]
526
- if @optimizer
527
- param_id = "layer_#{idx}_neuron_#{neuron_idx}_weight_#{i}"
528
- update = @optimizer.update(param_id, gradient)
529
- neuron.weights[i] += update
530
- else
531
- neuron.weights[i] += learning_rate * gradient
532
- end
533
- # Apply regularization to weights
534
- if lambda_l1 > 0
535
- neuron.weights[i] -= learning_rate * lambda_l1 * (neuron.weights[i] > 0 ? 1 : -1)
536
- end
537
- neuron.weights[i] -= learning_rate * lambda_l2 * 2 * neuron.weights[i] if lambda_l2 > 0
538
- end
539
- # Update bias
540
- if @optimizer
541
- param_id = "layer_#{idx}_neuron_#{neuron_idx}_bias"
542
- update = @optimizer.update(param_id, neuron.delta)
543
- neuron.bias += update
544
- else
545
- neuron.bias += learning_rate * neuron.delta
546
- end
547
- end
548
- end
549
- end
550
- end
551
- error_total += error_regularization
552
- if error_threshold && error_total < error_threshold
553
- puts "Error threshold reached at epoch #{epoch + 1}: #{error_total}"
554
- break
555
- end
556
- if error_total < best_error
557
- best_error = error_total
558
- patience_counter = 0
559
- else
560
- patience_counter += 1
561
- if patience_counter >= patience
562
- puts "Early stopping at epoch #{epoch + 1}, error has not improved for #{patience} epochs."
563
- break
564
- end
565
- end
566
- learning_rate *= decay
567
- @history_error << error_total if @plot
568
- if @print_epochs
569
- puts "Epoch #{epoch + 1}, Total Error: #{error_total.round(6)}, learning rate: #{learning_rate.round(6)}"
570
- end
571
- end
572
- GRYDRA.plot_error(@history_error) if @plot
573
- end
574
- def info_network
575
- puts "Neural network with #{@layers.size} layers:"
576
- @layers.each_with_index do |layer, i|
577
- puts " Layer #{i + 1}: #{layer.neurons.size} neurons, activation: #{layer.activation}"
578
- layer.neurons.each_with_index do |neuron, j|
579
- puts " Neuron #{j + 1}: Weights=#{neuron.weights.map { |p| p.round(3) }}, Bias=#{neuron.bias.round(3)}"
580
- end
581
- end
582
- end
583
- # Export to DOT for Graphviz
584
- def export_graphviz(filename = 'neural_network.dot')
585
- File.open(filename, 'w') do |f|
586
- f.puts 'digraph NeuralNetwork {'
587
- @layers.each_with_index do |layer, i|
588
- layer.neurons.each_with_index do |_neuron, j|
589
- node = "L#{i}_N#{j}"
590
- f.puts " #{node} [label=\"N#{j + 1}\"];"
591
- next unless i < @layers.size - 1
592
- @layers[i + 1].neurons.each_with_index do |next_neuron, k|
593
- weight = next_neuron.weights[j].round(3)
594
- f.puts " #{node} -> L#{i + 1}_N#{k} [label=\"#{weight}\"];"
595
- end
596
- end
597
- end
598
- f.puts '}'
599
- end
600
- puts "Network exported to #{filename} (Graphviz DOT)"
601
- end
602
- end
603
- ### MAIN NETWORK ###
604
- class MainNetwork
605
- attr_accessor :subnets
606
- def initialize(print_epochs = false, plot = false)
607
- @subnets = []
608
- @print_epochs = print_epochs
609
- @plot = plot
610
- end
611
- def add_subnet(structure, activations = nil)
612
- @subnets << NeuralNetwork.new(structure, @print_epochs, @plot, activations)
613
- end
614
- def train_subnets(data, learning_rate, epochs, **opts)
615
- data.each_with_index do |data_subnet, index|
616
- puts "Training Subnet #{index + 1}..."
617
- @subnets[index].train(data_subnet[:input], data_subnet[:output], learning_rate, epochs, **opts)
618
- end
619
- end
620
- def combine_results(input_main)
621
- outputs_subnets = @subnets.map { |subnet| subnet.calculate_outputs(input_main) }
622
- outputs_subnets.transpose.map { |outputs| outputs.sum / outputs.size }
623
- end
624
- def combine_results_weighted(input_main, weights = nil)
625
- outputs_subnets = @subnets.map { |subnet| subnet.calculate_outputs(input_main) }
626
- weights ||= Array.new(@subnets.size, 1.0 / @subnets.size)
627
- outputs_subnets.transpose.map do |outputs|
628
- outputs.zip(weights).map { |output, weight| output * weight }.sum
629
- end
630
- end
631
- end
632
- ### SAVE AND LOAD MODEL AND VOCABULARY ###
633
- def self.save_model(model, name, path = Dir.pwd, vocabulary = nil)
634
- file_path = File.join(path, "#{name}.net")
635
- # Open file in binary write mode and save the serialized object
636
- File.open(file_path, 'wb') { |f| Marshal.dump(model, f) }
637
- puts "\e[33mModel saved to '#{file_path}'\e[0m"
638
- # If vocabulary is passed, delegate saving it to another function
639
- return unless vocabulary
640
- save_vocabulary(vocabulary, name, path)
641
- end
642
- def self.load_model(name, path = Dir.pwd)
643
- model = nil
644
- file_path = File.join(path, "#{name}.net")
645
- File.open(file_path, 'rb') { |f| model = Marshal.load(f) }
646
- model
647
- end
648
- def self.save_vocabulary(vocabulary, name, path = Dir.pwd)
649
- file_path = File.join(path, "#{name}_vocab.bin")
650
- File.open(file_path, 'wb') { |f| Marshal.dump(vocabulary, f) }
651
- puts "\e[33mVocabulary saved to '#{file_path}'\e[0m"
652
- end
653
- def self.load_vocabulary(name, path = Dir.pwd)
654
- vocabulary = nil
655
- file_path = File.join(path, "#{name}_vocab.bin")
656
- File.open(file_path, 'rb') { |f| vocabulary = Marshal.load(f) }
657
- vocabulary
658
- end
659
- ### UPDATED PREPROCESSING ###
660
- def self.normalize_multiple(data, max_values, method = :max)
661
- case method
662
- when :max
663
- data.map do |row|
664
- row.each_with_index.map { |value, idx| value.to_f / max_values[idx] }
665
- end
666
- when :zscore
667
- means = max_values[:means]
668
- std_devs = max_values[:std_devs]
669
- data.map do |row|
670
- row.each_with_index.map do |value, idx|
671
- std_devs[idx] != 0 ? (value.to_f - means[idx]) / std_devs[idx] : 0
672
- end
673
- end
674
- else
675
- raise 'Unknown normalization method'
676
- end
677
- end
678
- def self.calculate_max_values(data, method = :max)
679
- if method == :max
680
- max_values = {}
681
- data.first.size.times do |i|
682
- max_values[i] = data.map { |row| row[i] }.max.to_f
683
- end
684
- max_values
685
- elsif method == :zscore
686
- n = data.size
687
- means = data.first.size.times.map do |i|
688
- data.map { |row| row[i] }.sum.to_f / n
689
- end
690
- std_devs = data.first.size.times.map do |i|
691
- m = means[i]
692
- Math.sqrt(data.map { |row| (row[i] - m)**2 }.sum.to_f / n)
693
- end
694
- { means: means, std_devs: std_devs }
695
- else
696
- raise 'Unknown method for calculating max values'
697
- end
698
- end
699
- ### TEXT FUNCTIONS ###
700
- def self.create_vocabulary(texts)
701
- texts.map(&:split).flatten.map(&:downcase).uniq
702
- end
703
- def self.vectorize_text(text, vocabulary)
704
- vector = Array.new(vocabulary.size, 0)
705
- words = text.downcase.split
706
- words.each do |word|
707
- index = vocabulary.index(word)
708
- vector[index] = 1 if index
709
- end
710
- vector
711
- end
712
- def self.normalize_with_vocabulary(data, vocabulary)
713
- max_value = vocabulary.size
714
- data.map { |vector| vector.map { |v| v.to_f / max_value } }
715
- end
716
- def self.create_advanced_vocabulary(texts, min_frequency = 1, max_words = nil)
717
- # Count frequencies
718
- frequencies = Hash.new(0)
719
- texts.each do |text|
720
- text.downcase.split.each { |word| frequencies[word] += 1 }
721
- end
722
- # Filter by minimum frequency
723
- vocabulary = frequencies.select { |_, freq| freq >= min_frequency }.keys
724
- # Limit size if specified
725
- if max_words && vocabulary.size > max_words
726
- vocabulary = frequencies.sort_by { |_, freq| -freq }.first(max_words).map(&:first)
727
- end
728
- vocabulary.sort
729
- end
730
- def self.vectorize_text_tfidf(text, vocabulary, corpus_frequencies)
731
- vector = Array.new(vocabulary.size, 0.0)
732
- words = text.downcase.split
733
- doc_frequencies = Hash.new(0)
734
- # Count document frequencies
735
- words.each { |word| doc_frequencies[word] += 1 }
736
- # Calculate TF-IDF
737
- vocabulary.each_with_index do |word, idx|
738
- next unless doc_frequencies[word] > 0
739
- tf = doc_frequencies[word].to_f / words.size
740
- idf = Math.log(corpus_frequencies.size.to_f / (corpus_frequencies[word] || 1))
741
- vector[idx] = tf * idf
742
- end
743
- vector
744
- end
745
- ### CLASS EasyNetwork (unchanged, just adding zscore normalization and activations option) ###
746
- class EasyNetwork
747
- attr_accessor :network, :vocabulary, :max_values, :max_values_output
748
- def initialize(print_epochs = false, plot = false)
749
- @network = GRYDRA::MainNetwork.new(print_epochs, plot)
750
- @vocabulary = nil
751
- @max_values = {}
752
- @max_values_output = {}
753
- end
754
- def configure_adam_optimizer(alpha = 0.001, beta1 = 0.9, beta2 = 0.999)
755
- @network.subnets.each { |subnet| subnet.use_adam_optimizer(alpha, beta1, beta2) }
756
- end
757
- def evaluate_model(data_test_x, data_test_y, metrics = %i[mse mae])
758
- predictions = predict_numerical(data_test_x)
759
- results = {}
760
- predictions_flat = predictions.flatten
761
- actuals_flat = data_test_y.flatten
762
- metrics.each do |metric|
763
- case metric
764
- when :mse
765
- results[:mse] = GRYDRA.mse(predictions_flat, actuals_flat)
766
- when :mae
767
- results[:mae] = GRYDRA.mae(predictions_flat, actuals_flat)
768
- when :accuracy
769
- results[:accuracy] = GRYDRA.accuracy(predictions_flat, actuals_flat)
770
- when :confusion_matrix
771
- results[:confusion_matrix] = GRYDRA.confusion_matrix(predictions_flat, actuals_flat)
772
- end
773
- end
774
- results
775
- end
776
- # --------- For hash-type data ---------
777
- def train_hashes(data_hash, input_keys, label_key, structures, rate, epochs, normalization = :max,
778
- **opts)
779
- @network.subnets.clear # clear previous subnets
780
- inputs = data_hash.map do |item|
781
- input_keys.map do |key|
782
- value = item[key]
783
- if value == true
784
- 1.0
785
- else
786
- value == false ? 0.0 : value.to_f
787
- end
788
- end
789
- end
790
- @max_values = GRYDRA.calculate_max_values(inputs, normalization)
791
- data_normalized = GRYDRA.normalize_multiple(inputs, @max_values, normalization)
792
- labels = data_hash.map { |item| [item[label_key].to_f] }
793
- @max_values_output = GRYDRA.calculate_max_values(labels, normalization)
794
- labels_no = GRYDRA.normalize_multiple(labels, @max_values_output, normalization)
795
- structures.each do |structure|
796
- @network.add_subnet([input_keys.size, *structure])
797
- end
798
- data_for_subnets = structures.map do |_|
799
- { input: data_normalized, output: labels_no }
800
- end
801
- @network.train_subnets(data_for_subnets, rate, epochs, **opts)
802
- end
803
- def predict_hashes(new_hashes, input_keys, normalization = :max)
804
- inputs = new_hashes.map do |item|
805
- input_keys.map do |key|
806
- value = item[key]
807
- if value == true
808
- 1.0
809
- else
810
- value == false ? 0.0 : value.to_f
811
- end
812
- end
813
- end
814
- data_normalized = GRYDRA.normalize_multiple(inputs, @max_values, normalization)
815
- data_normalized.map do |input|
816
- pred_norm = @network.combine_results(input)
817
- if normalization == :zscore && @max_values_output.is_a?(Hash)
818
- pred_norm.map.with_index do |val, idx|
819
- val * @max_values_output[:std_devs][idx] + @max_values_output[:means][idx]
820
- end
821
- else
822
- pred_norm.map.with_index { |val, idx| val * @max_values_output[idx] }
823
- end
824
- end
825
- end
826
- # --------- For numerical data ---------
827
- def train_numerical(data_input, data_output, structures, rate, epochs, normalization = :max, **opts)
828
- @network.subnets.clear # clear previous subnets
829
- @max_values = GRYDRA.calculate_max_values(data_input, normalization)
830
- @max_values_output = GRYDRA.calculate_max_values(data_output, normalization)
831
- data_input_no = GRYDRA.normalize_multiple(data_input, @max_values, normalization)
832
- data_output_no = GRYDRA.normalize_multiple(data_output, @max_values_output, normalization)
833
- structures.each do |structure|
834
- @network.add_subnet([data_input.first.size, *structure])
835
- end
836
- data_for_subnets = structures.map do |_|
837
- { input: data_input_no, output: data_output_no }
838
- end
839
- @network.train_subnets(data_for_subnets, rate, epochs, **opts)
840
- end
841
- def predict_numerical(new_data, normalization = :max)
842
- data_normalized = GRYDRA.normalize_multiple(new_data, @max_values, normalization)
843
- data_normalized.map do |input|
844
- pred_norm = @network.combine_results(input)
845
- if normalization == :zscore && @max_values_output.is_a?(Hash)
846
- pred_norm.map.with_index do |val, idx|
847
- val * @max_values_output[:std_devs][idx] + @max_values_output[:means][idx]
848
- end
849
- else
850
- pred_norm.map.with_index { |val, idx| val * @max_values_output[idx] }
851
- end
852
- end
853
- end
854
- # --------- For text data ---------
855
- def train_text(texts, labels, structures, rate, epochs, normalization = :max, **opts)
856
- @network.subnets.clear # clear previous subnets
857
- @vocabulary = GRYDRA.create_vocabulary(texts)
858
- inputs = texts.map { |text| GRYDRA.vectorize_text(text, @vocabulary) }
859
- @max_values = { 0 => @vocabulary.size } # Only vocabulary size for text
860
- data_normalized = GRYDRA.normalize_multiple(inputs, @max_values, normalization)
861
- @max_values_output = GRYDRA.calculate_max_values(labels, normalization)
862
- labels_no = GRYDRA.normalize_multiple(labels, @max_values_output, normalization)
863
- structures.each do |structure|
864
- @network.add_subnet([@vocabulary.size, *structure])
865
- end
866
- data_for_subnets = structures.map do |_|
867
- { input: data_normalized, output: labels_no }
868
- end
869
- @network.train_subnets(data_for_subnets, rate, epochs, **opts)
870
- end
871
- def predict_text(new_texts, normalization = :max)
872
- inputs = new_texts.map { |text| GRYDRA.vectorize_text(text, @vocabulary) }
873
- data_normalized = GRYDRA.normalize_multiple(inputs, @max_values, normalization)
874
- data_normalized.map do |input|
875
- pred_norm = @network.combine_results(input)
876
- if normalization == :zscore && @max_values_output.is_a?(Hash)
877
- pred_norm.map.with_index do |val, idx|
878
- val * @max_values_output[:std_devs][idx] + @max_values_output[:means][idx]
879
- end
880
- else
881
- pred_norm.map.with_index { |val, idx| val * @max_values_output[idx] }
882
- end
883
- end
884
- end
885
- end
886
- METHOD_DESCRIPTIONS = {
887
- # MainNetwork
888
- 'MainNetwork.add_subnet' => {
889
- description: 'Adds a subnet to the main network with the given structure. The structure defines the number of neurons per layer (including inputs).',
890
- example: <<~EX
891
- network = GRYDRA::MainNetwork.new
892
- network.add_subnet([2, 4, 1]) # 2 inputs, 4 hidden neurons, 1 output
893
- EX
894
- },
895
- 'MainNetwork.train_subnets' => {
896
- description: 'Trains all subnets using input and output data, with learning rate, epochs, and options like patience for early stopping.',
897
- example: <<~EX
898
- data = [
899
- {input: [[0.1, 0.2]], output: [[0.3]]},
900
- {input: [[0.5, 0.6]], output: [[0.7]]}
901
- ]
902
- network = GRYDRA::MainNetwork.new(true)
903
- network.add_subnet([2, 3, 1])
904
- network.add_subnet([2, 2, 1])
905
- network.train_subnets(data, 0.01, 1000, patience: 5, lambda_l1: 0.001, dropout: true)
906
- EX
907
- },
908
- 'MainNetwork.combine_results' => {
909
- description: 'Averages the outputs of all subnets for a given input, generating the final prediction.',
910
- example: <<~EX
911
- result = network.combine_results([0.2, 0.8])
912
- EX
913
- },
914
- 'MainNetwork.combine_results_weighted' => {
915
- description: 'Combines the outputs of all subnets using specific weights for each subnet.',
916
- example: <<~EX
917
- result = network.combine_results_weighted([0.2, 0.8], [0.6, 0.4])
918
- EX
919
- },
920
- # EasyNetwork (easier interface)
921
- 'EasyNetwork.train_numerical' => {
922
- description: 'Trains the network with numerical data (arrays of numbers) for input and output. Normalizes, creates subnets, and trains.',
923
- example: <<~EX
924
- data_input = [[170, 25], [160, 30], [180, 22]]
925
- data_output = [[65], [60], [75]]
926
- structures = [[4, 1], [3, 1]]
927
- network = GRYDRA::EasyNetwork.new(true)
928
- network.train_numerical(data_input, data_output, structures, 0.05, 15000, :max,
929
- lambda_l2: 0.001, dropout: true, dropout_rate: 0.3)
930
- EX
931
- },
932
- 'EasyNetwork.predict_numerical' => {
933
- description: 'Predicts values with new numerical data normalized the same way as training.',
934
- example: <<~EX
935
- new_data = [[172, 26]]
936
- predictions = network.predict_numerical(new_data, :max)
937
- EX
938
- },
939
- 'EasyNetwork.configure_adam_optimizer' => {
940
- description: 'Configures the Adam optimizer for all subnets with customizable parameters.',
941
- example: <<~EX
942
- network.configure_adam_optimizer(0.001, 0.9, 0.999)
943
- EX
944
- },
945
- 'EasyNetwork.evaluate_model' => {
946
- description: 'Evaluates the model with test data using multiple metrics.',
947
- example: <<~EX
948
- results = network.evaluate_model(test_x, test_y, [:mse, :mae, :accuracy])
949
- EX
950
- },
951
- 'EasyNetwork.train_hashes' => {
952
- description: 'Trains the network with input data in hash format, specifying the keys to use and the label key.',
953
- example: <<~EX
954
- data_hash = [
955
- {height: 170, age: 25, weight: 65},
956
- {height: 160, age: 30, weight: 60}
957
- ]
958
- network = GRYDRA::EasyNetwork.new(true)
959
- network.train_hashes(data_hash, [:height, :age], :weight, [[4, 1]], 0.05, 15000, :max)
960
- EX
961
- },
962
- 'EasyNetwork.predict_hashes' => {
963
- description: 'Predicts using hash data with the specified keys for input.',
964
- example: <<~EX
965
- new_hashes = [{height: 172, age: 26}]
966
- predictions = network.predict_hashes(new_hashes, [:height, :age], :max)
967
- EX
968
- },
969
- 'EasyNetwork.train_text' => {
970
- description: 'Trains the network with texts and numerical labels, creating a vocabulary to vectorize texts.',
971
- example: <<~EX
972
- texts = ["hello world", "good day"]
973
- labels = [[1], [0]]
974
- structures = [[5, 1]]
975
- network = GRYDRA::EasyNetwork.new(true)
976
- network.train_text(texts, labels, structures, 0.01, 5000)
977
- EX
978
- },
979
- 'EasyNetwork.predict_text' => {
980
- description: 'Predicts with new texts, vectorizing according to the learned vocabulary.',
981
- example: <<~EX
982
- new_texts = ["hello"]
983
- predictions = network.predict_text(new_texts)
984
- EX
985
- },
986
- # New activation functions
987
- 'GRYDRA.leaky_relu' => {
988
- description: 'Leaky ReLU activation function that allows a small gradient for negative values.',
989
- example: <<~EX
990
- result = GRYDRA.leaky_relu(-0.5, 0.01) # -0.005
991
- EX
992
- },
993
- 'GRYDRA.swish' => {
994
- description: 'Swish activation function (x * sigmoid(x)) which is smooth and non-monotonic.',
995
- example: <<~EX
996
- result = GRYDRA.swish(1.0)
997
- EX
998
- },
999
- 'GRYDRA.gelu' => {
1000
- description: 'GELU (Gaussian Error Linear Unit) activation function used in transformers.',
1001
- example: <<~EX
1002
- result = GRYDRA.gelu(0.5)
1003
- EX
1004
- },
1005
- # Regularization
1006
- 'GRYDRA.apply_dropout' => {
1007
- description: 'Applies dropout to outputs during training to prevent overfitting.',
1008
- example: <<~EX
1009
- dropout_outputs = GRYDRA.apply_dropout([0.5, 0.8, 0.3], 0.5, true)
1010
- EX
1011
- },
1012
- 'GRYDRA.l1_regularization' => {
1013
- description: 'Calculates L1 penalty (sum of absolute values) for regularization.',
1014
- example: <<~EX
1015
- penalty = GRYDRA.l1_regularization([0.5, -0.3, 0.8], 0.01)
1016
- EX
1017
- },
1018
- 'GRYDRA.l2_regularization' => {
1019
- description: 'Calculates L2 penalty (sum of squares) for regularization.',
1020
- example: <<~EX
1021
- penalty = GRYDRA.l2_regularization([0.5, -0.3, 0.8], 0.01)
1022
- EX
1023
- },
1024
- # Advanced metrics
1025
- 'GRYDRA.confusion_matrix' => {
1026
- description: 'Calculates the confusion matrix for binary classification problems.',
1027
- example: <<~EX
1028
- matrix = GRYDRA.confusion_matrix([0.8, 0.3, 0.9], [1, 0, 1], 0.5)
1029
- EX
1030
- },
1031
- 'GRYDRA.auc_roc' => {
1032
- description: 'Calculates the area under the ROC curve for classifier evaluation.',
1033
- example: <<~EX
1034
- auc = GRYDRA.auc_roc([0.8, 0.3, 0.9, 0.1], [1, 0, 1, 0])
1035
- EX
1036
- },
1037
- 'GRYDRA.accuracy' => {
1038
- description: 'Calculates the model\'s accuracy.',
1039
- example: <<~EX
1040
- acc = GRYDRA.accuracy([0.8, 0.3, 0.9], [1, 0, 1], 0.5)
1041
- EX
1042
- },
1043
- # Cross-validation
1044
- 'GRYDRA.cross_validation' => {
1045
- description: 'Performs k-fold cross-validation to robustly evaluate the model.',
1046
- example: <<~EX
1047
- result = GRYDRA.cross_validation(data_x, data_y, 5) do |train_x, train_y, test_x, test_y|
1048
- # train and evaluate model
1049
- error
1050
- end
1051
- EX
1052
- },
1053
- # Analysis and visualization
1054
- 'GRYDRA.analyze_gradients' => {
1055
- description: 'Analyzes the model\'s gradients to detect vanishing/exploding gradient problems.',
1056
- example: <<~EX
1057
- analysis = GRYDRA.analyze_gradients(model)
1058
- EX
1059
- },
1060
- 'GRYDRA.plot_architecture_ascii' => {
1061
- description: 'Displays an ASCII representation of the network architecture.',
1062
- example: <<~EX
1063
- GRYDRA.plot_architecture_ascii(model)
1064
- EX
1065
- },
1066
- # Utilities
1067
- 'GRYDRA.split_data' => {
1068
- description: 'Splits data into training and test sets randomly.',
1069
- example: <<~EX
1070
- split = GRYDRA.split_data(data_x, data_y, 0.8, 42)
1071
- EX
1072
- },
1073
- 'GRYDRA.hyperparameter_search' => {
1074
- description: 'Performs hyperparameter search using grid search.',
1075
- example: <<~EX
1076
- grid = [{rate: 0.01, epochs: 1000}, {rate: 0.1, epochs: 500}]
1077
- result = GRYDRA.hyperparameter_search(data_x, data_y, grid) do |params, x, y|
1078
- # train with params and return error
1079
- end
1080
- EX
1081
- },
1082
- 'GRYDRA.generate_synthetic_data' => {
1083
- description: 'Generates synthetic data for testing and experimentation.',
1084
- example: <<~EX
1085
- data = GRYDRA.generate_synthetic_data(100, 3, 0.1, 42)
1086
- EX
1087
- },
1088
- 'GRYDRA.min_max_normalize' => {
1089
- description: 'Normalizes data using Min-Max scaling to a specific range.',
1090
- example: <<~EX
1091
- data_norm = GRYDRA.min_max_normalize(data, 0, 1)
1092
- EX
1093
- },
1094
- 'GRYDRA.pca' => {
1095
- description: 'Performs Principal Component Analysis (simplified version).',
1096
- example: <<~EX
1097
- result = GRYDRA.pca(data, 2)
1098
- EX
1099
- },
1100
- # Advanced text processing
1101
- 'GRYDRA.create_advanced_vocabulary' => {
1102
- description: 'Creates vocabulary with frequency filtering and size limit.',
1103
- example: <<~EX
1104
- vocab = GRYDRA.create_advanced_vocabulary(texts, 2, 1000)
1105
- EX
1106
- },
1107
- 'GRYDRA.vectorize_text_tfidf' => {
1108
- description: 'Vectorizes text using TF-IDF instead of binary vectorization.',
1109
- example: <<~EX
1110
- vector = GRYDRA.vectorize_text_tfidf(text, vocabulary, corpus_freqs)
1111
- EX
1112
- },
1113
- # Existing methods
1114
- 'GRYDRA.describe_method' => {
1115
- description: 'Displays example of a class or method instance.',
1116
- example: <<~EX
1117
- GRYDRA.describe_method("GRYDRA", "save_model")
1118
- EX
1119
- },
1120
- 'GRYDRA.save_model' => {
1121
- description: 'Saves the trained model to a binary file so it can be loaded later. Optionally saves the vocabulary as well.',
1122
- example: <<~EX
1123
- GRYDRA.save_model(model, "my_model", "./models", vocabulary)
1124
- EX
1125
- },
1126
- 'GRYDRA.load_model' => {
1127
- description: 'Loads a saved model from a binary file to use it without retraining.',
1128
- example: <<~EX
1129
- model = GRYDRA.load_model("my_model", "./models")
1130
- EX
1131
- },
1132
- 'GRYDRA.save_vocabulary' => {
1133
- description: 'Saves the vocabulary to a binary file for later loading.',
1134
- example: <<~EX
1135
- GRYDRA.save_vocabulary(vocabulary, "my_model", "./models")
1136
- EX
1137
- },
1138
- 'GRYDRA.load_vocabulary' => {
1139
- description: 'Loads the vocabulary from a saved binary file.',
1140
- example: <<~EX
1141
- vocabulary = GRYDRA.load_vocabulary("my_model", "./models")
1142
- EX
1143
- },
1144
- 'GRYDRA.normalize_multiple' => {
1145
- description: 'Normalizes a set of data according to the specified method (:max or :zscore).',
1146
- example: <<~EX
1147
- max_values = GRYDRA.calculate_max_values(data, :max)
1148
- data_norm = GRYDRA.normalize_multiple(data, max_values, :max)
1149
- EX
1150
- },
1151
- 'GRYDRA.calculate_max_values' => {
1152
- description: 'Calculates maximum values or means and standard deviations according to the method for normalizing data.',
1153
- example: <<~EX
1154
- max_values = GRYDRA.calculate_max_values(data, :max)
1155
- statistics = GRYDRA.calculate_max_values(data, :zscore)
1156
- EX
1157
- },
1158
- 'GRYDRA.create_vocabulary' => {
1159
- description: 'Creates a unique vocabulary from a list of texts, separating words.',
1160
- example: <<~EX
1161
- texts = ["hello world", "good day"]
1162
- vocabulary = GRYDRA.create_vocabulary(texts)
1163
- EX
1164
- },
1165
- 'GRYDRA.vectorize_text' => {
1166
- description: 'Converts a text into a binary vector based on the presence of words in the vocabulary.',
1167
- example: <<~EX
1168
- vector = GRYDRA.vectorize_text("hello world", vocabulary)
1169
- EX
1170
- },
1171
- 'GRYDRA.normalize_with_vocabulary' => {
1172
- description: 'Normalizes vectors generated with the vocabulary by dividing by the vocabulary size.',
1173
- example: <<~EX
1174
- vectors_norm = GRYDRA.normalize_with_vocabulary(vectors, vocabulary)
1175
- EX
1176
- },
1177
- 'GRYDRA.generate_example' => {
1178
- description: 'Generates a functional code example with the library, with examples from 1 to 9.',
1179
- example: <<~EX
1180
- GRYDRA.generate_example(1)
1181
- EX
1182
- },
1183
- 'GRYDRA.suggest_structure' => {
1184
- description: 'Automatically suggests a possible neural network structure based on the number of inputs and outputs.',
1185
- example: <<~EX
1186
- suggested_structure = GRYDRA.suggest_structure(3, 1)
1187
- EX
1188
- },
1189
- 'GRYDRA.convert_hashes_to_vectors' => {
1190
- description: 'Converts an array of hashes (like JSON) to numerical arrays for training.',
1191
- example: <<~EX
1192
- data = [
1193
- { name: "A", age: 20, vip: true },
1194
- { name: "B", age: 30, vip: false }
1195
- ]
1196
- data_vectors = GRYDRA.convert_hashes_to_vectors(data, [:age, :vip])
1197
- EX
1198
- },
1199
- 'GRYDRA.summary_model' => {
1200
- description: 'Displays the subnets, their structures, and activation functions of a loaded model to the console.',
1201
- example: <<~EX
1202
- GRYDRA.summary_model(model)
1203
- EX
1204
- },
1205
- 'GRYDRA.validate_model' => {
1206
- description: 'Checks if a "model" is actually a compatible model.',
1207
- example: <<~EX
1208
- GRYDRA.validate_model(model)
1209
- EX
1210
- },
1211
- 'GRYDRA.test_all_normalizations' => {
1212
- description: 'Tests training with :max and :zscore and shows the final error with each one.',
1213
- example: <<~EX
1214
- inputs = [[1], [2], [3]]
1215
- outputs = [[2], [4], [6]]
1216
- structure = [[1, 3, 1]]
1217
- GRYDRA.test_all_normalizations(inputs, outputs, structure)
1218
- EX
1219
- }
1220
- }
1221
- # Function to display description and example of a method given class and method (strings)
1222
- def self.describe_method(class_name, method_name)
1223
- key = "#{class_name}.#{method_name}"
1224
- info = METHOD_DESCRIPTIONS[key]
1225
- if info
1226
- puts "\e[1;3;5;37mDescription of #{key}:"
1227
- puts info[:description]
1228
- puts "
1229
- Example of use:"
1230
- puts "#{info[:example]}\e[0m"
1231
- else
1232
- puts "\e[31;1mNo description found for method '#{key}'"
1233
- puts "\e[31mMake sure to use the exact class and method name (as strings)"
1234
- puts "\e[36mYou can call the method to verify: list_methods_available\e[0m"
1235
- end
1236
- end
1237
- # Function that lists all documented public methods in METHOD_DESCRIPTIONS
1238
- def self.list_methods_available
1239
- puts "\e[1;3;5;37mDocumented public methods:"
1240
- grouped = METHOD_DESCRIPTIONS.keys.group_by { |k| k.split('.').first }
1241
- grouped.each do |class_name, methods|
1242
- puts " #{class_name}:"
1243
- methods.each { |m| puts " - #{m.split('.').last}" }
1244
- end
1245
- print "\e[0m"
1246
- end
1247
- def self.generate_example(num_example, filename = 'example', extension = 'rb', path = Dir.pwd)
1248
- case num_example
1249
- when 1
1250
- content = <<~RUBY
1251
- require 'grydra'
1252
- # Training data
1253
- training_data = [
1254
- { name: "Company 1", num_employees: 5, is_new: false, site: true, label: 0 },
1255
- { name: "Company 2", num_employees: 4, is_new: true, site: false, label: 0 },
1256
- { name: "Company 3", num_employees: 4, is_new: false, site: false, label: 1 },
1257
- { name: "Company 4", num_employees: 20, is_new: false, site: false, label: 1 },
1258
- { name: "Company 5", num_employees: 60, is_new: false, site: false, label: 1 },
1259
- { name: "Company 6", num_employees: 90, is_new: false, site: false, label: 0 },
1260
- { name: "Company 7", num_employees: 33, is_new: true, site: false, label: 0 },
1261
- { name: "Company 8", num_employees: 33, is_new: false, site: true, label: 0 },
1262
- { name: "Company 9", num_employees: 15, is_new: false, site: false, label: 1 },
1263
- { name: "Company 10", num_employees: 40, is_new: false, site: true, label: 0 },
1264
- { name: "Company 11", num_employees: 3, is_new: false, site: false, label: 0 },
1265
- { name: "Company 12", num_employees: 66, is_new: false, site: true, label: 0 },
1266
- { name: "Company 13", num_employees: 15, is_new: true, site: false, label: 0 },
1267
- { name: "Company 13", num_employees: 10, is_new: false, site: false, label: 1 },
1268
- { name: "Company 13", num_employees: 33, is_new: false, site: false, label: 1 },
1269
- { name: "Company 13", num_employees: 8, is_new: false, site: false, label: 1 },
1270
- ]
1271
- # Create the model with regularization and dropout
1272
- model = GRYDRA::EasyNetwork.new(true, true)
1273
- # Configure Adam optimizer
1274
- model.configure_adam_optimizer(0.001, 0.9, 0.999)
1275
- # Train with L2 regularization and dropout
1276
- model.train_hashes(
1277
- training_data,
1278
- [:num_employees, :is_new, :site],
1279
- :label,
1280
- [[3, 4, 1]],
1281
- 0.05,
1282
- 12000,
1283
- :max,
1284
- lambda_l2: 0.001,
1285
- dropout: true,
1286
- dropout_rate: 0.3
1287
- )
1288
- # Save the trained model
1289
- GRYDRA.save_model(model, "company_model_advanced")
1290
- puts "Training completed with regularization and Adam optimizer."
1291
- RUBY
1292
- when 10
1293
- content = <<~RUBY
1294
- require 'grydra'
1295
- # Example of cross-validation with hyperparameter search
1296
- puts "Example of Cross-Validation and Hyperparameter Search"
1297
- # Generate synthetic data
1298
- synthetic_data = GRYDRA.generate_synthetic_data(200, 3, 0.1, 42)
1299
- data_x = synthetic_data[:data]
1300
- data_y = synthetic_data[:labels]
1301
- # Define hyperparameter grid
1302
- param_grid = [
1303
- { rate: 0.01, epochs: 1000, lambda_l2: 0.001 },
1304
- { rate: 0.05, epochs: 800, lambda_l2: 0.01 },
1305
- { rate: 0.1, epochs: 500, lambda_l2: 0.001 },
1306
- { rate: 0.02, epochs: 1200, lambda_l2: 0.005 }
1307
- ]
1308
- # Hyperparameter search with cross-validation
1309
- best_result = GRYDRA.hyperparameter_search(data_x, data_y, param_grid) do |params, x, y|
1310
- # Cross-validation for each configuration
1311
- result_cv = GRYDRA.cross_validation(x, y, 5) do |train_x, train_y, test_x, test_y|
1312
- # Create and train model
1313
- model = GRYDRA::EasyNetwork.new(false)
1314
- model.configure_adam_optimizer(params[:rate])
1315
- #{' '}
1316
- model.train_numerical(
1317
- train_x, train_y, [[4, 3, 1]],#{' '}
1318
- params[:rate], params[:epochs], :max,
1319
- lambda_l2: params[:lambda_l2],
1320
- patience: 50
1321
- )
1322
- #{' '}
1323
- # Evaluate on test set
1324
- predictions = model.predict_numerical(test_x)
1325
- GRYDRA.mse(predictions.flatten, test_y.flatten)
1326
- end
1327
- #{' '}
1328
- result_cv[:average]
1329
- end
1330
- puts "\
1331
- Best configuration found:"
1332
- puts "Parameters: \#{best_result[:parameters]}"
1333
- puts "Average CV Error: \#{best_result[:score].round(6)}"
1334
- # Train final model with best parameters
1335
- puts "\
1336
- Training final model..."
1337
- final_model = GRYDRA::EasyNetwork.new(true)
1338
- final_model.configure_adam_optimizer(best_result[:parameters][:rate])
1339
- # Split data for final training
1340
- split = GRYDRA.split_data(data_x, data_y, 0.8, 42)
1341
- final_model.train_numerical(
1342
- split[:train_x], split[:train_y], [[4, 3, 1]],
1343
- best_result[:parameters][:rate],#{' '}
1344
- best_result[:parameters][:epochs],#{' '}
1345
- :max,
1346
- lambda_l2: best_result[:parameters][:lambda_l2]
1347
- )
1348
- # Evaluate final model
1349
- evaluation = final_model.evaluate_model(split[:test_x], split[:test_y], [:mse, :mae])
1350
- puts "\
1351
- Evaluation of final model:"
1352
- puts "MSE: \#{evaluation[:mse].round(6)}"
1353
- puts "MAE: \#{evaluation[:mae].round(6)}"
1354
- # Analyze gradients
1355
- analysis = GRYDRA.analyze_gradients(final_model.network)
1356
- puts "\
1357
- Gradient analysis:"
1358
- puts "Average: \#{analysis[:average].round(6)}"
1359
- puts "Maximum: \#{analysis[:maximum].round(6)}"
1360
- puts "Minimum: \#{analysis[:minimum].round(6)}"
1361
- # Show architecture
1362
- GRYDRA.plot_architecture_ascii(final_model.network)
1363
- RUBY
1364
- when 11
1365
- content = <<~RUBY
1366
- require 'grydra'
1367
- # Example of advanced text processing with TF-IDF
1368
- puts " Example of Advanced Text Processing"
1369
- # Example corpus
1370
- texts = [
1371
- "the cat climbed the tree",
1372
- "the dog ran through the park",
1373
- "birds fly high",
1374
- "the cat and the dog are friends",
1375
- "the trees in the park are tall",
1376
- "the dog barks at the cat",
1377
- "birds sing in the trees"
1378
- ]
1379
- # Labels (0: animals, 1: nature)
1380
- labels = [[0], [0], [1], [0], [1], [0], [1]]
1381
- # Create advanced vocabulary with filtering
1382
- vocabulary = GRYDRA.create_advanced_vocabulary(texts, 2, 50)
1383
- puts "Vocabulary created: \#{vocabulary.size} words"
1384
- puts "Words: \#{vocabulary.join(', ')}"
1385
- # Calculate corpus frequencies for TF-IDF
1386
- corpus_freqs = Hash.new(0)
1387
- texts.each do |text|
1388
- text.split.uniq.each { |word| corpus_freqs[word] += 1 }
1389
- end
1390
- # Vectorize texts using TF-IDF
1391
- vectors_tfidf = texts.map do |text|
1392
- GRYDRA.vectorize_text_tfidf(text, vocabulary, corpus_freqs)
1393
- end
1394
- puts "\
1395
- TF-IDF vectorization completed"
1396
- puts "Vector dimension: \#{vectors_tfidf.first.size}"
1397
- # Train model with TF-IDF vectors
1398
- model = GRYDRA::EasyNetwork.new(true)
1399
- model.configure_adam_optimizer(0.01)
1400
- # Normalize TF-IDF vectors
1401
- max_values = GRYDRA.calculate_max_values(vectors_tfidf, :max)
1402
- vectors_norm = GRYDRA.normalize_multiple(vectors_tfidf, max_values, :max)
1403
- # Train
1404
- model.train_numerical(
1405
- vectors_norm, labels, [[8, 4, 1]],#{' '}
1406
- 0.01, 2000, :max,
1407
- lambda_l1: 0.001,
1408
- dropout: true,
1409
- dropout_rate: 0.2
1410
- )
1411
- # Test with new texts
1412
- new_texts = [
1413
- "the cat sleeps",
1414
- "the trees are green",
1415
- "the dog plays"
1416
- ]
1417
- puts "\
1418
- Predictions for new texts:"
1419
- new_texts.each do |text|
1420
- vector_tfidf = GRYDRA.vectorize_text_tfidf(text, vocabulary, corpus_freqs)
1421
- vector_norm = GRYDRA.normalize_multiple([vector_tfidf], max_values, :max)
1422
- prediction = model.predict_numerical(vector_norm)
1423
- #{' '}
1424
- category = prediction[0][0] > 0.5 ? "Nature" : "Animals"
1425
- puts "'\#{text}' → \#{prediction[0][0].round(3)} (\#{category})"
1426
- end
1427
- # Show model analysis
1428
- puts "\
1429
- Model analysis:"
1430
- evaluation = model.evaluate_model(vectors_norm, labels, [:mse, :mae])
1431
- puts "MSE: \#{evaluation[:mse].round(6)}"
1432
- puts "MAE: \#{evaluation[:mae].round(6)}"
1433
- GRYDRA.plot_architecture_ascii(model.network)
1434
- RUBY
1435
- when 12
1436
- content = <<~RUBY
1437
- require 'grydra'
1438
- # Example of advanced classification metrics
1439
- puts " Example of Advanced Classification Metrics"
1440
- # Generate binary classification data
1441
- data_x = [
1442
- [0.1, 0.2], [0.8, 0.9], [0.2, 0.1], [0.9, 0.8],
1443
- [0.3, 0.4], [0.7, 0.6], [0.4, 0.3], [0.6, 0.7],
1444
- [0.15, 0.25], [0.85, 0.75], [0.25, 0.15], [0.75, 0.85]
1445
- ]
1446
- data_y = [[0], [1], [0], [1], [0], [1], [0], [1], [0], [1], [0], [1]]
1447
- # Split data
1448
- split = GRYDRA.split_data(data_x, data_y, 0.7, 42)
1449
- # Train model
1450
- model = GRYDRA::EasyNetwork.new(true)
1451
- model.configure_adam_optimizer(0.1)
1452
- model.train_numerical(
1453
- split[:train_x], split[:train_y], [[3, 1]],#{' '}
1454
- 0.1, 1000, :max,
1455
- lambda_l2: 0.01
1456
- )
1457
- # Make predictions
1458
- predictions = model.predict_numerical(split[:test_x])
1459
- pred_flat = predictions.flatten
1460
- actual_flat = split[:test_y].flatten
1461
- puts "\
1462
- Predictions vs Actuals:"
1463
- predictions.zip(split[:test_y]).each_with_index do |(pred, actual), i|
1464
- puts "Sample \#{i+1}: Pred=\#{pred[0].round(3)}, Actual=\#{actual[0]}"
1465
- end
1466
- # Calculate advanced metrics
1467
- puts "\
1468
- Evaluation Metrics:"
1469
- # Accuracy
1470
- accuracy = GRYDRA.accuracy(pred_flat, actual_flat, 0.5)
1471
- puts "Accuracy: \#{(accuracy * 100).round(2)}%"
1472
- # Confusion matrix
1473
- matrix = GRYDRA.confusion_matrix(pred_flat, actual_flat, 0.5)
1474
- puts "\
1475
- Confusion Matrix:"
1476
- puts " TP: \#{matrix[:tp]}, FP: \#{matrix[:fp]}"
1477
- puts " TN: \#{matrix[:tn]}, FN: \#{matrix[:fn]}"
1478
- # Precision, Recall, F1
1479
- if matrix[:tp] + matrix[:fp] > 0
1480
- precision = GRYDRA.precision(matrix[:tp], matrix[:fp])
1481
- puts "Precision: \#{(precision * 100).round(2)}%"
1482
- end
1483
- if matrix[:tp] + matrix[:fn] > 0
1484
- recall = GRYDRA.recall(matrix[:tp], matrix[:fn])
1485
- puts "Recall: \#{(recall * 100).round(2)}%"
1486
- #{' '}
1487
- if defined?(precision) && precision > 0 && recall > 0
1488
- f1 = GRYDRA.f1(precision, recall)
1489
- puts "F1-Score: \#{(f1 * 100).round(2)}%"
1490
- end
1491
- end
1492
- # AUC-ROC
1493
- auc = GRYDRA.auc_roc(pred_flat, actual_flat)
1494
- puts "AUC-ROC: \#{auc.round(4)}"
1495
- # MSE and MAE
1496
- mse = GRYDRA.mse(pred_flat, actual_flat)
1497
- mae = GRYDRA.mae(pred_flat, actual_flat)
1498
- puts "\
1499
- Regression Metrics:"
1500
- puts "MSE: \#{mse.round(6)}"
1501
- puts "MAE: \#{mae.round(6)}"
1502
- # Gradient analysis
1503
- puts "\
1504
- Gradient Analysis:"
1505
- analysis = GRYDRA.analyze_gradients(model.network)
1506
- puts "Average: \#{analysis[:average].round(6)}"
1507
- puts "Deviation: \#{analysis[:deviation].round(6)}"
1508
- puts "Range: [\#{analysis[:minimum].round(6)}, \#{analysis[:maximum].round(6)}]"
1509
- # Show architecture
1510
- GRYDRA.plot_architecture_ascii(model.network)
1511
- RUBY
1512
- else
1513
- content = case num_example
1514
- when 2
1515
- <<~RUBY
1516
- require 'grydra'
1517
- model = nil
1518
- model = GRYDRA.load_model("company_model_advanced")
1519
- # New company data to evaluate
1520
- new_data = [
1521
- { name: "New Company A", num_employees: 12, is_new: true, site: true },
1522
- { name: "New Company B", num_employees: 50, is_new: false, site: false },
1523
- { name: "New Company C", num_employees: 7, is_new: false, site: false },
1524
- { name: "New Company D", num_employees: 22, is_new: true, site: true }
1525
- ]
1526
- # Make predictions
1527
- predictions = model.predict_hashes(new_data, [:num_employees, :is_new, :site])
1528
- # Show results
1529
- new_data.each_with_index do |company, i|
1530
- prediction = predictions[i].first.round(3)
1531
- puts "Company: \#{company[:name]} → Prediction: \#{prediction} (\#{prediction >= 0.5 ? 'Label 1 (Yes)' : 'Label 0 (No)'})"
1532
- end
1533
- RUBY
1534
- when 3
1535
- <<~RUBY
1536
- require 'grydra'
1537
- # Create main network
1538
- network = GRYDRA::MainNetwork.new #No parameters, just don't print epochs or plot
1539
- # Add a subnet with structure [2 inputs, 2 hidden, 1 output]
1540
- network.add_subnet([2, 3, 1], [:tanh, :tanh])
1541
- network.add_subnet([2, 4, 1], [:sigmoid, :sigmoid])
1542
- # XOR data
1543
- inputs = [
1544
- [0, 0],
1545
- [0, 1],
1546
- [1, 0],
1547
- [1, 1]
1548
- ]
1549
- outputs = [
1550
- [0],
1551
- [1],
1552
- [1],
1553
- [0]
1554
- ]
1555
- # Training
1556
- epochs = 6000
1557
- learning_rate = 0.9
1558
- network.train_subnets(
1559
- [
1560
- {input: inputs, output: outputs},
1561
- {input: inputs, output: outputs},
1562
- ],
1563
- learning_rate,#{' '}
1564
- epochs,
1565
- batch_size: 1, #Number of data points to train simultaneously
1566
- patience: 100, #If results don't improve for (n) epochs, then stop#{' '}
1567
- decay: 0.995 #Number by which it multiplies#{' '}
1568
- )
1569
- # Evaluation
1570
- puts "\
1571
- XOR Evaluation:"
1572
- inputs.each do |input|
1573
- output = network.combine_results(input)
1574
- puts "Input: \#{input} => Output: \#{output.map { |v| v.round(3) }}" #>0.5 in this case would be 1#{' '}
1575
- end
1576
- RUBY
1577
- when 4
1578
- <<~RUBY
1579
- require 'grydra'
1580
- # Original data (Celsius and Fahrenheit temperatures)
1581
- data_in = [[0], [10], [20], [30], [40], [50], [-10], [-20], [100], [-30], [-5], [-40]]
1582
- data_out = [[32], [50], [68], [86], [104], [122], [14], [-4], [212], [-22], [23], [-40]]
1583
- # Find maximum values to normalize (:max method)
1584
- max_in = GRYDRA.calculate_max_values(data_in, :max) # {0 => value}
1585
- max_out = GRYDRA.calculate_max_values(data_out, :max) # {0 => value}
1586
- # Normalize data
1587
- data_in_no = GRYDRA.normalize_multiple(data_in, max_in, :max)
1588
- data_out_no = GRYDRA.normalize_multiple(data_out, max_out, :max)
1589
- # Create main network
1590
- main_network = GRYDRA::MainNetwork.new(false, true) #Doesn't print errors, but plots
1591
- # Add subnets
1592
- main_network.add_subnet([1, 4, 1], [:sigmoid, :tanh])
1593
- main_network.add_subnet([1, 3, 1], [:relu, :tanh])
1594
- main_network.add_subnet([1, 2, 1], [:tanh, :tanh])
1595
- puts "Training subnets..."
1596
- main_network.train_subnets(
1597
- [
1598
- { input: data_in_no, output: data_out_no },
1599
- { input: data_in_no, output: data_out_no },
1600
- { input: data_in_no, output: data_out_no }
1601
- ],
1602
- 0.2, # learning rate
1603
- 25000, # epochs
1604
- batch_size: 5,
1605
- patience: 500,
1606
- decay: 0.995
1607
- )
1608
- puts "\
1609
- Enter Celsius degrees separated by space:"
1610
- print "<< "
1611
- user_input = gets.chomp.split.map(&:to_f)
1612
- # Normalize inputs
1613
- user_input_no = user_input.map { |e| [e] }
1614
- user_input_no = GRYDRA.normalize_multiple(user_input_no, max_in, :max)
1615
- puts "\
1616
- Combined results:"
1617
- user_input_no.each_with_index do |input_norm, i|
1618
- prediction_norm = main_network.combine_results(input_norm)
1619
- prediction = [prediction_norm[0] * max_out[0]]#{' '}
1620
- puts "\#{user_input[i]} °C : \#{prediction[0].round(2)} °F"
1621
- end
1622
- RUBY
1623
- when 5
1624
- <<~RUBY
1625
- require 'grydra'
1626
- =begin
1627
- IMPORTANT NOTE:
1628
- If using zscore normalization, we must be more meticulous and it's recommended#{' '}
1629
- to constantly lower and raise the learning rate, because with zscore once it reaches#{' '}
1630
- the expected results it will stay around that margin. The rate can be lowered and raised#{' '}
1631
- as much as desired, for example:
1632
- 0.003, 0.3, 0.9, 0.2, 0.223, 0.00008
1633
- =end
1634
- # Create instance of EasyNetwork class
1635
- network = GRYDRA::EasyNetwork.new(true, true) # true to print epochs and another true to plot
1636
- # Original data (Celsius and Fahrenheit temperatures)
1637
- data_in = [[0], [10], [20], [30], [40], [50], [-10], [-20], [100], [-30], [-5], [-40]]
1638
- data_out = [[32], [50], [68], [86], [104], [122], [14], [-4], [212], [-22], [23], [-40]]
1639
- # Define subnet structures (hidden layers)
1640
- structures = [
1641
- [2, 4, 1], # hidden layer with 4 neurons, 1 output
1642
- [1, 3, 1], # another subnet, 3 hidden neurons
1643
- [2, 7, 1] # and another smaller one
1644
- ]
1645
- puts "Training network..."
1646
- network.train_numerical(data_in, data_out, structures, 0.5, 30000, :zscore)#{' '}
1647
- #network.train_numerical(data_in, data_out, structures, 0.5, 30000, :max)
1648
- puts "\
1649
- Enter Celsius degrees separated by space:"
1650
- print "<< "
1651
- user_input = gets.chomp.split.map(&:to_f).map { |v| [v] }
1652
- predictions = network.predict_numerical(user_input, :zscore)
1653
- #predictions = network.predict_numerical(user_input, :max)#{' '}
1654
- puts "\
1655
- Results:"
1656
- user_input.each_with_index do |input, i|
1657
- f = predictions[i][0]
1658
- puts "\#{input[0]} °C : \#{f.round(2)} °F"
1659
- end
1660
- RUBY
1661
- when 6
1662
- <<~RUBY
1663
- require 'grydra'
1664
- # Create input and output data
1665
- data_input = [
1666
- [170, 25],
1667
- [160, 30],
1668
- [180, 22],
1669
- [150, 28],
1670
- [175, 24]
1671
- ]
1672
- # Weight corresponding to each person (kg)
1673
- data_output = [
1674
- [65],
1675
- [60],
1676
- [75],
1677
- [55],
1678
- [70]
1679
- ]
1680
- # Define subnet structures
1681
- # Each subnet has: 2 inputs → 3 hidden neurons → 1 output
1682
- structures = [
1683
- [3, 1],
1684
- [4, 1]
1685
- ]
1686
- # Create network using the easy interface
1687
- network = GRYDRA::EasyNetwork.new(true) # true to print only error per epoch
1688
- # Train the network
1689
- network.train_numerical(
1690
- data_input,
1691
- data_output,
1692
- structures,
1693
- 0.05, # learning rate
1694
- 15000, # epochs
1695
- :max # normalization type
1696
- )
1697
- # Predict for a new individual
1698
- new_data = [[172, 26]]
1699
- predictions = network.predict_numerical(new_data, :max)
1700
- puts "\
1701
- Result:"
1702
- puts "Height: \#{new_data[0][0]}, Age: \#{new_data[0][1]} ⇒ Estimated weight: \#{predictions[0][0].round(2)} kg"
1703
- RUBY
1704
- when 7
1705
- <<~RUBY
1706
- require 'grydra'
1707
- # Training data: [height (cm), age (years)]
1708
- data_input = [
1709
- [170, 25],
1710
- [160, 30],
1711
- [180, 22],
1712
- [175, 28],
1713
- [165, 35],
1714
- [155, 40],
1715
- [185, 20]
1716
- ]
1717
- # Real weight in kg
1718
- data_output = [
1719
- [65],
1720
- [60],
1721
- [75],
1722
- [70],
1723
- [62],
1724
- [58],
1725
- [80]
1726
- ]
1727
- # Structures for subnets: hidden layers and output
1728
- structures = [
1729
- [4, 1],#{' '}
1730
- [3, 1],
1731
- [6, 1],
1732
- [2, 1]
1733
- ]
1734
- # Create Easy Network
1735
- network = GRYDRA::EasyNetwork.new(true, true) # true to see training progress and another true to plot
1736
- # We will assign 'sigmoid' activation to the last layer to limit output between 0 and 1
1737
- # Adjust internally in the add_subnet method
1738
- structures.each do |structure|
1739
- # Define activations: hidden with :tanh, output with :sigmoid
1740
- activations = Array.new(structure.size - 1, :tanh) + [:sigmoid]
1741
- network.network.add_subnet([data_input.first.size, *structure], activations)
1742
- end
1743
- # :max normalization (easy to denormalize)
1744
- network.train_numerical(data_input, data_output, structures, 0.01, 10000, :max)
1745
- # New sample to predict: height=172 cm, age=26 years
1746
- new_data = [[172, 26]]
1747
- # Make prediction (normalized internally)
1748
- predictions = network.predict_numerical(new_data, :max)
1749
- # Predictions are already denormalized by EasyNetwork, just round them
1750
- puts "Predicted weight (kg) for height \#{new_data[0][0]} cm and age \#{new_data[0][1]} years:"
1751
- print predictions.map { |p| p[0].round(2) }
1752
- GRYDRA.save_model(network, "average_weight")
1753
- RUBY
1754
- when 8
1755
- <<~RUBY
1756
- # predict.rb
1757
- require 'grydra'
1758
- # Load the previously saved model
1759
- model = GRYDRA.load_model("average_weight")
1760
- # Input data to predict: height=172 cm, age=26 years
1761
- new_data = [[172, 26]]
1762
- # Normalization used (can be :max or :zscore, depending on training)
1763
- normalization = :max
1764
- # Make prediction
1765
- predictions = model.predict_numerical(new_data, normalization)
1766
- puts "Predicted weight (kg) for height \#{new_data[0][0]} cm and age \#{new_data[0][1]} years:"
1767
- puts predictions.map { |p| p[0].round(2) }
1768
- RUBY
1769
- when 9
1770
- <<~RUBY
1771
- #Program to determine product price
1772
- require 'grydra'
1773
- #product cost will be in dollars
1774
- data_input = [
1775
- [10, 0], #<-- Number of products per company and (0 and 1) if is vip or not#{' '}
1776
- [15, 0],
1777
- [8, 1],
1778
- [20, 1],
1779
- [12, 0],
1780
- [30, 1],
1781
- [25, 1],
1782
- [5, 0],
1783
- [18, 0],
1784
- [40, 1]
1785
- ]
1786
- #Price in dollars
1787
- data_output = [
1788
- [20],
1789
- [28],
1790
- [25],
1791
- [45],
1792
- [22],
1793
- [60],
1794
- [50],
1795
- [12],
1796
- [32],
1797
- [80]
1798
- ]
1799
- #Data normalization (common data --> Vector data)
1800
- max_in = GRYDRA.calculate_max_values(data_input, :max) #We will use max normalization, although zscore is possible
1801
- max_out = GRYDRA.calculate_max_values(data_output, :max)
1802
- data_in_no = GRYDRA.normalize_multiple(data_input, max_in, :max) #By default uses max, so :max is optional
1803
- data_out_no = GRYDRA.normalize_multiple(data_output, max_out)
1804
- #Create the network
1805
- network = GRYDRA::MainNetwork.new
1806
- #We need to add subnets to our network
1807
- network.add_subnet([2, 4, 1], [:relu, :tanh])#{' '}
1808
- network.add_subnet([2, 3, 1], [:tanh, :tanh])#{' '}
1809
- puts "Training subnets"
1810
- network.train_subnets(
1811
- [
1812
- {input: data_in_no, output: data_out_no},
1813
- {input: data_in_no, output: data_out_no}
1814
- ],
1815
- 0.2, #Learning rate
1816
- 20000, #Number of epochs
1817
- batch_size: 3, #Means it will analyze 3 data points at a time
1818
- patience: 500, #If results don't improve in these epochs then this network stops
1819
- decay: 0.995 #number by which it multiplies
1820
- )
1821
- puts "Enter new values to predict the product price eg: (12 0)"
1822
- values = gets.chomp.strip.split.map(&:to_f)
1823
- input_norm = GRYDRA.normalize_multiple([values], max_in, :max)[0]
1824
- prediction = network.combine_results(input_norm)
1825
- prediction_denorm = prediction[0] * max_out[0]
1826
- puts "Approximate price in dollars is $#{prediction_denorm.round(2)}"
1827
- RUBY
1828
- else
1829
- puts "\e[1;35mPossible examples are from 1 to 12\e[0m"
1830
- return
1831
- end
1832
- end
1833
- return unless num_example.between?(1, 12)
1834
- File.write(File.join(path, "#{filename}.#{extension}"), content)
1835
- puts "Example generated and saved to \e[33m#{File.join(path, filename)}\e[0m"
1836
- end
1837
- def self.suggest_structure(inputs, outputs = 1)
1838
- hidden = [(inputs + outputs) * 2, (inputs + outputs)].uniq
1839
- [[inputs, *hidden, outputs]]
1840
- end
1841
- def self.plot_error(errors, print_every = 5, bar_width = 40, delta_min = 0.001)
1842
- max_error = errors.max
1843
- first_error = errors.first
1844
- puts "
1845
- Error graph by epoch"
1846
- puts '-' * (bar_width + 40)
1847
- last_printed = nil
1848
- errors.each_with_index do |error, i|
1849
- epoch = i + 1
1850
- next unless epoch == 1 || epoch == errors.size || epoch % print_every == 0
1851
- if last_printed && (last_printed - error).abs < delta_min && epoch != errors.size
1852
- # If the difference from the last printed is less than delta, skip
1853
- next
1854
- end
1855
- bar_length = [(bar_width * error / max_error).round, 1].max
1856
- bar = '=' * bar_length
1857
- improvement_pct = ((first_error - error) / first_error.to_f) * 100
1858
- improvement_str = improvement_pct >= 0 ? "+#{improvement_pct.round(2)}%" : "#{improvement_pct.round(2)}%"
1859
- puts "Epoch #{epoch.to_s.ljust(4)} | #{bar.ljust(bar_width)} | Error: #{error.round(6)} | Improvement: #{improvement_str}"
1860
- last_printed = error
1861
- end
1862
- puts '-' * (bar_width + 40)
1863
- puts "Initial error: #{first_error.round(6)}, Final error: #{errors.last.round(6)}
1864
- "
1865
- end
1866
- def self.convert_hashes_to_vectors(array_hashes, keys)
1867
- array_hashes.map do |hash|
1868
- keys.map do |k|
1869
- if hash[k]
1870
- if hash[k] == true
1871
- 1.0
1872
- else
1873
- hash[k] == false ? 0.0 : hash[k].to_f
1874
- end
1875
- else
1876
- 0.0
1877
- end
1878
- end
1879
- end
1880
- end
1881
- def self.summary_model(model, input_test = nil)
1882
- puts "
1883
- \e[1;36mModel summary:\e[0m"
1884
- # In case it's a wrapper like EasyNetwork
1885
- model = model.network if model.respond_to?(:network) && model.network.respond_to?(:subnets)
1886
- if model.respond_to?(:subnets)
1887
- model.subnets.each_with_index do |subnet, i|
1888
- puts "
1889
- Subnet ##{i + 1}:"
1890
- structure = subnet.layers.map { |l| l.neurons.size }
1891
- hidden_activations = subnet.layers[0...-1].map(&:activation)
1892
- output_function = subnet.layers.last.activation
1893
- puts " - Structure: #{structure.inspect}"
1894
- puts " - Hidden activations: #{hidden_activations.inspect}"
1895
- puts " - Output function: #{output_function.inspect}"
1896
- next unless input_test
1897
- begin
1898
- output = subnet.calculate_outputs(input_test)
1899
- puts " - Numerical output with input #{input_test.inspect}: #{output.inspect}"
1900
- rescue StandardError => e
1901
- puts " - Error calculating output: #{e.message}"
1902
- end
1903
- end
1904
- else
1905
- GRYDRA.validate_model(model)
1906
- end
1907
- end
1908
- def self.test_all_normalizations(inputs, outputs, structures)
1909
- %i[max zscore].each do |type|
1910
- puts "
1911
- Testing normalization: #{type}"
1912
- network = GRYDRA::EasyNetwork.new(false)
1913
- final_error = network.train_numerical(inputs, outputs, structures, 0.1, 5000, type)
1914
- puts " Final error: #{final_error}"
1915
- end
1916
- end
1917
- def self.validate_model(model)
1918
- if model.nil?
1919
- puts "\e[31mError: model is nil\e[0m"
1920
- elsif model.is_a?(GRYDRA::EasyNetwork) || model.is_a?(GRYDRA::MainNetwork)
1921
- puts "\e[32mValid model of type #{model.class}\e[0m"
1922
- else
1923
- puts "\e[33mWarning: The loaded model is not a known instance (#{model.class})\e[0m"
1924
- end
1925
- end
1926
- end