grydra 1.0.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENCE +34 -0
- data/README.md +1381 -80
- data/lib/grydra/activations.rb +70 -0
- data/lib/grydra/callbacks.rb +212 -0
- data/lib/grydra/documentation.rb +84 -0
- data/lib/grydra/initializers.rb +14 -0
- data/lib/grydra/layers/base.rb +10 -0
- data/lib/grydra/layers/conv.rb +106 -0
- data/lib/grydra/layers/dense.rb +17 -0
- data/lib/grydra/layers/lstm.rb +139 -0
- data/lib/grydra/losses.rb +119 -0
- data/lib/grydra/metrics.rb +75 -0
- data/lib/grydra/networks/easy_network.rb +161 -0
- data/lib/grydra/networks/main_network.rb +38 -0
- data/lib/grydra/networks/neural_network.rb +203 -0
- data/lib/grydra/networks/neuron.rb +80 -0
- data/lib/grydra/normalization.rb +77 -0
- data/lib/grydra/optimizers.rb +162 -0
- data/lib/grydra/preprocessing/data.rb +48 -0
- data/lib/grydra/preprocessing/pca.rb +132 -0
- data/lib/grydra/preprocessing/text.rb +62 -0
- data/lib/grydra/regularization.rb +19 -0
- data/lib/grydra/training/cross_validation.rb +35 -0
- data/lib/grydra/training/hyperparameter_search.rb +46 -0
- data/lib/grydra/utils/examples.rb +183 -0
- data/lib/grydra/utils/persistence.rb +94 -0
- data/lib/grydra/utils/visualization.rb +105 -0
- data/lib/grydra/version.rb +3 -0
- data/lib/grydra.rb +162 -2
- metadata +96 -17
- data/lib/gr/core.rb +0 -1926
- data/lib/gr/version.rb +0 -3
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
module GRYDRA
|
|
2
|
+
module Normalization
|
|
3
|
+
# Z-score normalization
|
|
4
|
+
def self.zscore_normalize(data)
|
|
5
|
+
n = data.size
|
|
6
|
+
means = data.first.size.times.map { |i| data.map { |row| row[i] }.sum.to_f / n }
|
|
7
|
+
std_devs = data.first.size.times.map do |i|
|
|
8
|
+
m = means[i]
|
|
9
|
+
Math.sqrt(data.map { |row| (row[i] - m)**2 }.sum.to_f / n)
|
|
10
|
+
end
|
|
11
|
+
normalized = data.map do |row|
|
|
12
|
+
row.each_with_index.map { |value, i| std_devs[i] != 0 ? (value - means[i]) / std_devs[i] : 0 }
|
|
13
|
+
end
|
|
14
|
+
[normalized, means, std_devs]
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def self.zscore_denormalize(normalized, means, std_devs)
|
|
18
|
+
normalized.map do |row|
|
|
19
|
+
row.each_with_index.map { |value, i| value * std_devs[i] + means[i] }
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Min-Max normalization
|
|
24
|
+
def self.min_max_normalize(data, min_val = 0, max_val = 1)
|
|
25
|
+
data_min = data.flatten.min
|
|
26
|
+
data_max = data.flatten.max
|
|
27
|
+
range = data_max - data_min
|
|
28
|
+
return data if range == 0
|
|
29
|
+
|
|
30
|
+
data.map do |row|
|
|
31
|
+
row.map { |v| min_val + (v - data_min) * (max_val - min_val) / range }
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Generic normalization with multiple methods
|
|
36
|
+
def self.normalize_multiple(data, max_values, method = :max)
|
|
37
|
+
case method
|
|
38
|
+
when :max
|
|
39
|
+
data.map do |row|
|
|
40
|
+
row.each_with_index.map { |value, idx| value.to_f / max_values[idx] }
|
|
41
|
+
end
|
|
42
|
+
when :zscore
|
|
43
|
+
means = max_values[:means]
|
|
44
|
+
std_devs = max_values[:std_devs]
|
|
45
|
+
data.map do |row|
|
|
46
|
+
row.each_with_index.map do |value, idx|
|
|
47
|
+
std_devs[idx] != 0 ? (value.to_f - means[idx]) / std_devs[idx] : 0
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
else
|
|
51
|
+
raise ArgumentError, "Unknown normalization method: #{method}"
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def self.calculate_max_values(data, method = :max)
|
|
56
|
+
if method == :max
|
|
57
|
+
max_values = {}
|
|
58
|
+
data.first.size.times do |i|
|
|
59
|
+
max_values[i] = data.map { |row| row[i] }.max.to_f
|
|
60
|
+
end
|
|
61
|
+
max_values
|
|
62
|
+
elsif method == :zscore
|
|
63
|
+
n = data.size
|
|
64
|
+
means = data.first.size.times.map do |i|
|
|
65
|
+
data.map { |row| row[i] }.sum.to_f / n
|
|
66
|
+
end
|
|
67
|
+
std_devs = data.first.size.times.map do |i|
|
|
68
|
+
m = means[i]
|
|
69
|
+
Math.sqrt(data.map { |row| (row[i] - m)**2 }.sum.to_f / n)
|
|
70
|
+
end
|
|
71
|
+
{ means: means, std_devs: std_devs }
|
|
72
|
+
else
|
|
73
|
+
raise ArgumentError, "Unknown method for calculating max values: #{method}"
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
end
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
module GRYDRA
|
|
2
|
+
module Optimizers
|
|
3
|
+
# Base Optimizer class
|
|
4
|
+
class Base
|
|
5
|
+
def update(parameter_id, gradient)
|
|
6
|
+
raise NotImplementedError, 'Subclasses must implement update method'
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
def reset
|
|
10
|
+
raise NotImplementedError, 'Subclasses must implement reset method'
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
# Stochastic Gradient Descent with Momentum
|
|
15
|
+
class SGD < Base
|
|
16
|
+
attr_reader :learning_rate, :momentum
|
|
17
|
+
|
|
18
|
+
def initialize(learning_rate: 0.01, momentum: 0.9, nesterov: false)
|
|
19
|
+
@learning_rate = learning_rate
|
|
20
|
+
@momentum = momentum
|
|
21
|
+
@nesterov = nesterov
|
|
22
|
+
@velocity = {}
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def update(parameter_id, gradient)
|
|
26
|
+
@velocity[parameter_id] ||= 0
|
|
27
|
+
|
|
28
|
+
if @nesterov
|
|
29
|
+
# Nesterov momentum
|
|
30
|
+
@velocity[parameter_id] = @momentum * @velocity[parameter_id] - @learning_rate * gradient
|
|
31
|
+
@momentum * @velocity[parameter_id] - @learning_rate * gradient
|
|
32
|
+
else
|
|
33
|
+
# Classical momentum
|
|
34
|
+
@velocity[parameter_id] = @momentum * @velocity[parameter_id] - @learning_rate * gradient
|
|
35
|
+
@velocity[parameter_id]
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def reset
|
|
40
|
+
@velocity.clear
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Adam Optimizer (Adaptive Moment Estimation)
|
|
45
|
+
class Adam < Base
|
|
46
|
+
attr_reader :alpha, :beta1, :beta2, :epsilon
|
|
47
|
+
|
|
48
|
+
def initialize(alpha: 0.001, beta1: 0.9, beta2: 0.999, epsilon: 1e-8)
|
|
49
|
+
@alpha = alpha
|
|
50
|
+
@beta1 = beta1
|
|
51
|
+
@beta2 = beta2
|
|
52
|
+
@epsilon = epsilon
|
|
53
|
+
@m = {}
|
|
54
|
+
@v = {}
|
|
55
|
+
@t = 0
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def update(parameter_id, gradient)
|
|
59
|
+
@t += 1
|
|
60
|
+
@m[parameter_id] ||= 0
|
|
61
|
+
@v[parameter_id] ||= 0
|
|
62
|
+
|
|
63
|
+
@m[parameter_id] = @beta1 * @m[parameter_id] + (1 - @beta1) * gradient
|
|
64
|
+
@v[parameter_id] = @beta2 * @v[parameter_id] + (1 - @beta2) * gradient**2
|
|
65
|
+
|
|
66
|
+
m_hat = @m[parameter_id] / (1 - @beta1**@t)
|
|
67
|
+
v_hat = @v[parameter_id] / (1 - @beta2**@t)
|
|
68
|
+
|
|
69
|
+
@alpha * m_hat / (Math.sqrt(v_hat) + @epsilon)
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def reset
|
|
73
|
+
@m.clear
|
|
74
|
+
@v.clear
|
|
75
|
+
@t = 0
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# RMSprop Optimizer
|
|
80
|
+
class RMSprop < Base
|
|
81
|
+
attr_reader :learning_rate, :decay_rate, :epsilon
|
|
82
|
+
|
|
83
|
+
def initialize(learning_rate: 0.001, decay_rate: 0.9, epsilon: 1e-8)
|
|
84
|
+
@learning_rate = learning_rate
|
|
85
|
+
@decay_rate = decay_rate
|
|
86
|
+
@epsilon = epsilon
|
|
87
|
+
@cache = {}
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
def update(parameter_id, gradient)
|
|
91
|
+
@cache[parameter_id] ||= 0
|
|
92
|
+
@cache[parameter_id] = @decay_rate * @cache[parameter_id] + (1 - @decay_rate) * gradient**2
|
|
93
|
+
@learning_rate * gradient / (Math.sqrt(@cache[parameter_id]) + @epsilon)
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def reset
|
|
97
|
+
@cache.clear
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# AdaGrad Optimizer
|
|
102
|
+
class AdaGrad < Base
|
|
103
|
+
attr_reader :learning_rate, :epsilon
|
|
104
|
+
|
|
105
|
+
def initialize(learning_rate: 0.01, epsilon: 1e-8)
|
|
106
|
+
@learning_rate = learning_rate
|
|
107
|
+
@epsilon = epsilon
|
|
108
|
+
@cache = {}
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
def update(parameter_id, gradient)
|
|
112
|
+
@cache[parameter_id] ||= 0
|
|
113
|
+
@cache[parameter_id] += gradient**2
|
|
114
|
+
@learning_rate * gradient / (Math.sqrt(@cache[parameter_id]) + @epsilon)
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def reset
|
|
118
|
+
@cache.clear
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# AdamW Optimizer (Adam with decoupled weight decay)
|
|
123
|
+
class AdamW < Base
|
|
124
|
+
attr_reader :alpha, :beta1, :beta2, :epsilon, :weight_decay
|
|
125
|
+
|
|
126
|
+
def initialize(alpha: 0.001, beta1: 0.9, beta2: 0.999, epsilon: 1e-8, weight_decay: 0.01)
|
|
127
|
+
@alpha = alpha
|
|
128
|
+
@beta1 = beta1
|
|
129
|
+
@beta2 = beta2
|
|
130
|
+
@epsilon = epsilon
|
|
131
|
+
@weight_decay = weight_decay
|
|
132
|
+
@m = {}
|
|
133
|
+
@v = {}
|
|
134
|
+
@t = 0
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
def update(parameter_id, gradient, parameter_value: 0)
|
|
138
|
+
@t += 1
|
|
139
|
+
@m[parameter_id] ||= 0
|
|
140
|
+
@v[parameter_id] ||= 0
|
|
141
|
+
|
|
142
|
+
@m[parameter_id] = @beta1 * @m[parameter_id] + (1 - @beta1) * gradient
|
|
143
|
+
@v[parameter_id] = @beta2 * @v[parameter_id] + (1 - @beta2) * gradient**2
|
|
144
|
+
|
|
145
|
+
m_hat = @m[parameter_id] / (1 - @beta1**@t)
|
|
146
|
+
v_hat = @v[parameter_id] / (1 - @beta2**@t)
|
|
147
|
+
|
|
148
|
+
# AdamW: decoupled weight decay
|
|
149
|
+
@alpha * (m_hat / (Math.sqrt(v_hat) + @epsilon) + @weight_decay * parameter_value)
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
def reset
|
|
153
|
+
@m.clear
|
|
154
|
+
@v.clear
|
|
155
|
+
@t = 0
|
|
156
|
+
end
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
# Alias for backward compatibility
|
|
160
|
+
AdamOptimizer = Adam
|
|
161
|
+
end
|
|
162
|
+
end
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
module GRYDRA
|
|
2
|
+
module Preprocessing
|
|
3
|
+
module Data
|
|
4
|
+
# Split data into training and test sets
|
|
5
|
+
def self.split_data(data_x, data_y, training_ratio = 0.8, seed = nil)
|
|
6
|
+
srand(seed) if seed
|
|
7
|
+
indices = (0...data_x.size).to_a.shuffle
|
|
8
|
+
cut = (data_x.size * training_ratio).to_i
|
|
9
|
+
|
|
10
|
+
{
|
|
11
|
+
train_x: indices[0...cut].map { |i| data_x[i] },
|
|
12
|
+
train_y: indices[0...cut].map { |i| data_y[i] },
|
|
13
|
+
test_x: indices[cut..-1].map { |i| data_x[i] },
|
|
14
|
+
test_y: indices[cut..-1].map { |i| data_y[i] }
|
|
15
|
+
}
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# Generate synthetic data for testing
|
|
19
|
+
def self.generate_synthetic_data(n_samples, n_features, noise = 0.1, seed = nil)
|
|
20
|
+
srand(seed) if seed
|
|
21
|
+
data = Array.new(n_samples) do
|
|
22
|
+
Array.new(n_features) { rand * 2 - 1 + (rand * noise - noise / 2) }
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Generate labels based on a simple function
|
|
26
|
+
labels = data.map do |sample|
|
|
27
|
+
value = sample.each_with_index.sum { |x, i| x * (i + 1) * 0.1 }
|
|
28
|
+
[value + (rand * noise - noise / 2)]
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
{ data: data, labels: labels }
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Convert hashes to vectors
|
|
35
|
+
def self.convert_hashes_to_vectors(array_hashes, keys)
|
|
36
|
+
array_hashes.map do |hash|
|
|
37
|
+
keys.map do |k|
|
|
38
|
+
if hash[k]
|
|
39
|
+
hash[k] == true ? 1.0 : (hash[k] == false ? 0.0 : hash[k].to_f)
|
|
40
|
+
else
|
|
41
|
+
0.0
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
module GRYDRA
|
|
2
|
+
module Preprocessing
|
|
3
|
+
module PCA
|
|
4
|
+
# Principal Component Analysis with Power Iteration
|
|
5
|
+
def self.pca(data, components: 2, max_iterations: 1000, tolerance: 1e-6)
|
|
6
|
+
n = data.size
|
|
7
|
+
m = data.first.size
|
|
8
|
+
|
|
9
|
+
# Center data
|
|
10
|
+
means = (0...m).map { |i| data.map { |row| row[i] }.sum.to_f / n }
|
|
11
|
+
centered_data = data.map { |row| row.zip(means).map { |v, mean| v - mean } }
|
|
12
|
+
|
|
13
|
+
# Calculate covariance matrix
|
|
14
|
+
covariance = calculate_covariance(centered_data, n, m)
|
|
15
|
+
|
|
16
|
+
# Find principal components using power iteration
|
|
17
|
+
principal_components = []
|
|
18
|
+
eigenvalues = []
|
|
19
|
+
|
|
20
|
+
components.times do |comp_idx|
|
|
21
|
+
eigenvector, eigenvalue = power_iteration(
|
|
22
|
+
covariance,
|
|
23
|
+
max_iterations: max_iterations,
|
|
24
|
+
tolerance: tolerance,
|
|
25
|
+
deflate: principal_components
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
break unless eigenvector
|
|
29
|
+
|
|
30
|
+
principal_components << eigenvector
|
|
31
|
+
eigenvalues << eigenvalue
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Project data onto principal components
|
|
35
|
+
transformed_data = centered_data.map do |row|
|
|
36
|
+
principal_components.map do |pc|
|
|
37
|
+
row.zip(pc).map { |a, b| a * b }.sum
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
{
|
|
42
|
+
means: means,
|
|
43
|
+
principal_components: principal_components,
|
|
44
|
+
eigenvalues: eigenvalues,
|
|
45
|
+
explained_variance: calculate_explained_variance(eigenvalues),
|
|
46
|
+
transformed_data: transformed_data,
|
|
47
|
+
covariance: covariance
|
|
48
|
+
}
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def self.transform(data, pca_result)
|
|
52
|
+
# Transform new data using existing PCA
|
|
53
|
+
centered = data.map do |row|
|
|
54
|
+
row.zip(pca_result[:means]).map { |v, mean| v - mean }
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
centered.map do |row|
|
|
58
|
+
pca_result[:principal_components].map do |pc|
|
|
59
|
+
row.zip(pc).map { |a, b| a * b }.sum
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
private
|
|
65
|
+
|
|
66
|
+
def self.calculate_covariance(centered_data, n, m)
|
|
67
|
+
covariance = Array.new(m) { Array.new(m, 0.0) }
|
|
68
|
+
(0...m).each do |i|
|
|
69
|
+
(0...m).each do |j|
|
|
70
|
+
covariance[i][j] = centered_data.map { |row| row[i] * row[j] }.sum / (n - 1).to_f
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
covariance
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def self.power_iteration(matrix, max_iterations:, tolerance:, deflate: [])
|
|
77
|
+
n = matrix.size
|
|
78
|
+
|
|
79
|
+
# Initialize random vector
|
|
80
|
+
vector = Array.new(n) { rand }
|
|
81
|
+
vector = normalize_vector(vector)
|
|
82
|
+
|
|
83
|
+
# Deflate for previously found components
|
|
84
|
+
deflate.each do |pc|
|
|
85
|
+
projection = vector.zip(pc).map { |a, b| a * b }.sum
|
|
86
|
+
vector = vector.zip(pc).map { |v, p| v - projection * p }
|
|
87
|
+
end
|
|
88
|
+
vector = normalize_vector(vector)
|
|
89
|
+
|
|
90
|
+
max_iterations.times do
|
|
91
|
+
# Multiply matrix by vector
|
|
92
|
+
new_vector = matrix_vector_multiply(matrix, vector)
|
|
93
|
+
|
|
94
|
+
# Normalize
|
|
95
|
+
new_vector = normalize_vector(new_vector)
|
|
96
|
+
|
|
97
|
+
# Check convergence
|
|
98
|
+
diff = vector.zip(new_vector).map { |a, b| (a - b).abs }.max
|
|
99
|
+
|
|
100
|
+
vector = new_vector
|
|
101
|
+
|
|
102
|
+
break if diff < tolerance
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# Calculate eigenvalue (Rayleigh quotient)
|
|
106
|
+
numerator = matrix_vector_multiply(matrix, vector).zip(vector).map { |a, b| a * b }.sum
|
|
107
|
+
denominator = vector.map { |v| v**2 }.sum
|
|
108
|
+
eigenvalue = numerator / denominator
|
|
109
|
+
|
|
110
|
+
[vector, eigenvalue]
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def self.matrix_vector_multiply(matrix, vector)
|
|
114
|
+
matrix.map do |row|
|
|
115
|
+
row.zip(vector).map { |a, b| a * b }.sum
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def self.normalize_vector(vector)
|
|
120
|
+
magnitude = Math.sqrt(vector.map { |v| v**2 }.sum)
|
|
121
|
+
return vector if magnitude.zero?
|
|
122
|
+
vector.map { |v| v / magnitude }
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def self.calculate_explained_variance(eigenvalues)
|
|
126
|
+
total = eigenvalues.sum
|
|
127
|
+
return [] if total.zero?
|
|
128
|
+
eigenvalues.map { |ev| ev / total }
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
end
|
|
132
|
+
end
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
module GRYDRA
|
|
2
|
+
module Preprocessing
|
|
3
|
+
module Text
|
|
4
|
+
# Create vocabulary from texts
|
|
5
|
+
def self.create_vocabulary(texts)
|
|
6
|
+
texts.map(&:split).flatten.map(&:downcase).uniq
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
# Create advanced vocabulary with frequency filtering
|
|
10
|
+
def self.create_advanced_vocabulary(texts, min_frequency = 1, max_words = nil)
|
|
11
|
+
frequencies = Hash.new(0)
|
|
12
|
+
texts.each do |text|
|
|
13
|
+
text.downcase.split.each { |word| frequencies[word] += 1 }
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
vocabulary = frequencies.select { |_, freq| freq >= min_frequency }.keys
|
|
17
|
+
|
|
18
|
+
if max_words && vocabulary.size > max_words
|
|
19
|
+
vocabulary = frequencies.sort_by { |_, freq| -freq }.first(max_words).map(&:first)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
vocabulary.sort
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Vectorize text (binary)
|
|
26
|
+
def self.vectorize_text(text, vocabulary)
|
|
27
|
+
vector = Array.new(vocabulary.size, 0)
|
|
28
|
+
words = text.downcase.split
|
|
29
|
+
words.each do |word|
|
|
30
|
+
index = vocabulary.index(word)
|
|
31
|
+
vector[index] = 1 if index
|
|
32
|
+
end
|
|
33
|
+
vector
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Vectorize text using TF-IDF
|
|
37
|
+
def self.vectorize_text_tfidf(text, vocabulary, corpus_frequencies)
|
|
38
|
+
vector = Array.new(vocabulary.size, 0.0)
|
|
39
|
+
words = text.downcase.split
|
|
40
|
+
doc_frequencies = Hash.new(0)
|
|
41
|
+
|
|
42
|
+
words.each { |word| doc_frequencies[word] += 1 }
|
|
43
|
+
|
|
44
|
+
vocabulary.each_with_index do |word, idx|
|
|
45
|
+
next unless doc_frequencies[word] > 0
|
|
46
|
+
|
|
47
|
+
tf = doc_frequencies[word].to_f / words.size
|
|
48
|
+
idf = Math.log(corpus_frequencies.size.to_f / (corpus_frequencies[word] || 1))
|
|
49
|
+
vector[idx] = tf * idf
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
vector
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# Normalize with vocabulary
|
|
56
|
+
def self.normalize_with_vocabulary(data, vocabulary)
|
|
57
|
+
max_value = vocabulary.size
|
|
58
|
+
data.map { |vector| vector.map { |v| v.to_f / max_value } }
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
module GRYDRA
|
|
2
|
+
module Regularization
|
|
3
|
+
# Apply dropout to outputs during training
|
|
4
|
+
def self.apply_dropout(outputs, dropout_rate = 0.5, training = true)
|
|
5
|
+
return outputs unless training
|
|
6
|
+
outputs.map { |s| rand < dropout_rate ? 0 : s / (1 - dropout_rate) }
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
# L1 regularization (Lasso)
|
|
10
|
+
def self.l1_regularization(weights, lambda_l1)
|
|
11
|
+
lambda_l1 * weights.sum { |p| p.abs }
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
# L2 regularization (Ridge)
|
|
15
|
+
def self.l2_regularization(weights, lambda_l2)
|
|
16
|
+
lambda_l2 * weights.sum { |p| p**2 }
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
module GRYDRA
|
|
2
|
+
module Training
|
|
3
|
+
module CrossValidation
|
|
4
|
+
# K-fold cross-validation
|
|
5
|
+
def self.cross_validation(data_input, data_output, k_folds = 5)
|
|
6
|
+
indices = (0...data_input.size).to_a.shuffle
|
|
7
|
+
fold_size = data_input.size / k_folds
|
|
8
|
+
errors = []
|
|
9
|
+
|
|
10
|
+
k_folds.times do |i|
|
|
11
|
+
start = i * fold_size
|
|
12
|
+
finish = [start + fold_size, data_input.size].min
|
|
13
|
+
indices_test = indices[start...finish]
|
|
14
|
+
indices_train = indices - indices_test
|
|
15
|
+
|
|
16
|
+
# Split data
|
|
17
|
+
train_x = indices_train.map { |idx| data_input[idx] }
|
|
18
|
+
train_y = indices_train.map { |idx| data_output[idx] }
|
|
19
|
+
test_x = indices_test.map { |idx| data_input[idx] }
|
|
20
|
+
test_y = indices_test.map { |idx| data_output[idx] }
|
|
21
|
+
|
|
22
|
+
# Train and evaluate
|
|
23
|
+
error = yield(train_x, train_y, test_x, test_y)
|
|
24
|
+
errors << error
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
{
|
|
28
|
+
errors: errors,
|
|
29
|
+
average: errors.sum / errors.size.to_f,
|
|
30
|
+
deviation: Math.sqrt(errors.map { |e| (e - errors.sum / errors.size.to_f)**2 }.sum / errors.size.to_f)
|
|
31
|
+
}
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
end
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
module GRYDRA
|
|
2
|
+
module Training
|
|
3
|
+
module HyperparameterSearch
|
|
4
|
+
# Grid search for hyperparameters
|
|
5
|
+
def self.hyperparameter_search(data_x, data_y, param_grid, verbose: true)
|
|
6
|
+
best_params = nil
|
|
7
|
+
best_score = Float::INFINITY
|
|
8
|
+
results = []
|
|
9
|
+
|
|
10
|
+
puts 'Starting hyperparameter search...' if verbose
|
|
11
|
+
|
|
12
|
+
param_grid.each_with_index do |params, idx|
|
|
13
|
+
puts "Testing configuration #{idx + 1}/#{param_grid.size}: #{params}" if verbose
|
|
14
|
+
|
|
15
|
+
begin
|
|
16
|
+
score = yield(params, data_x, data_y)
|
|
17
|
+
results << { parameters: params, score: score }
|
|
18
|
+
|
|
19
|
+
if score < best_score
|
|
20
|
+
best_score = score
|
|
21
|
+
best_params = params
|
|
22
|
+
puts " New best configuration! Score: #{score.round(6)}" if verbose
|
|
23
|
+
else
|
|
24
|
+
puts " Score: #{score.round(6)}" if verbose
|
|
25
|
+
end
|
|
26
|
+
rescue StandardError => e
|
|
27
|
+
puts " Error with this configuration: #{e.message}" if verbose
|
|
28
|
+
results << { parameters: params, score: Float::INFINITY, error: e.message }
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
if verbose
|
|
33
|
+
puts "\nBest parameters found:"
|
|
34
|
+
puts " Configuration: #{best_params}"
|
|
35
|
+
puts " Score: #{best_score.round(6)}"
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
{
|
|
39
|
+
parameters: best_params,
|
|
40
|
+
score: best_score,
|
|
41
|
+
all_results: results.sort_by { |r| r[:score] }
|
|
42
|
+
}
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|