grydra 1.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,77 @@
1
+ module GRYDRA
2
+ module Normalization
3
+ # Z-score normalization
4
+ def self.zscore_normalize(data)
5
+ n = data.size
6
+ means = data.first.size.times.map { |i| data.map { |row| row[i] }.sum.to_f / n }
7
+ std_devs = data.first.size.times.map do |i|
8
+ m = means[i]
9
+ Math.sqrt(data.map { |row| (row[i] - m)**2 }.sum.to_f / n)
10
+ end
11
+ normalized = data.map do |row|
12
+ row.each_with_index.map { |value, i| std_devs[i] != 0 ? (value - means[i]) / std_devs[i] : 0 }
13
+ end
14
+ [normalized, means, std_devs]
15
+ end
16
+
17
+ def self.zscore_denormalize(normalized, means, std_devs)
18
+ normalized.map do |row|
19
+ row.each_with_index.map { |value, i| value * std_devs[i] + means[i] }
20
+ end
21
+ end
22
+
23
+ # Min-Max normalization
24
+ def self.min_max_normalize(data, min_val = 0, max_val = 1)
25
+ data_min = data.flatten.min
26
+ data_max = data.flatten.max
27
+ range = data_max - data_min
28
+ return data if range == 0
29
+
30
+ data.map do |row|
31
+ row.map { |v| min_val + (v - data_min) * (max_val - min_val) / range }
32
+ end
33
+ end
34
+
35
+ # Generic normalization with multiple methods
36
+ def self.normalize_multiple(data, max_values, method = :max)
37
+ case method
38
+ when :max
39
+ data.map do |row|
40
+ row.each_with_index.map { |value, idx| value.to_f / max_values[idx] }
41
+ end
42
+ when :zscore
43
+ means = max_values[:means]
44
+ std_devs = max_values[:std_devs]
45
+ data.map do |row|
46
+ row.each_with_index.map do |value, idx|
47
+ std_devs[idx] != 0 ? (value.to_f - means[idx]) / std_devs[idx] : 0
48
+ end
49
+ end
50
+ else
51
+ raise ArgumentError, "Unknown normalization method: #{method}"
52
+ end
53
+ end
54
+
55
+ def self.calculate_max_values(data, method = :max)
56
+ if method == :max
57
+ max_values = {}
58
+ data.first.size.times do |i|
59
+ max_values[i] = data.map { |row| row[i] }.max.to_f
60
+ end
61
+ max_values
62
+ elsif method == :zscore
63
+ n = data.size
64
+ means = data.first.size.times.map do |i|
65
+ data.map { |row| row[i] }.sum.to_f / n
66
+ end
67
+ std_devs = data.first.size.times.map do |i|
68
+ m = means[i]
69
+ Math.sqrt(data.map { |row| (row[i] - m)**2 }.sum.to_f / n)
70
+ end
71
+ { means: means, std_devs: std_devs }
72
+ else
73
+ raise ArgumentError, "Unknown method for calculating max values: #{method}"
74
+ end
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,162 @@
1
+ module GRYDRA
2
+ module Optimizers
3
+ # Base Optimizer class
4
+ class Base
5
+ def update(parameter_id, gradient)
6
+ raise NotImplementedError, 'Subclasses must implement update method'
7
+ end
8
+
9
+ def reset
10
+ raise NotImplementedError, 'Subclasses must implement reset method'
11
+ end
12
+ end
13
+
14
+ # Stochastic Gradient Descent with Momentum
15
+ class SGD < Base
16
+ attr_reader :learning_rate, :momentum
17
+
18
+ def initialize(learning_rate: 0.01, momentum: 0.9, nesterov: false)
19
+ @learning_rate = learning_rate
20
+ @momentum = momentum
21
+ @nesterov = nesterov
22
+ @velocity = {}
23
+ end
24
+
25
+ def update(parameter_id, gradient)
26
+ @velocity[parameter_id] ||= 0
27
+
28
+ if @nesterov
29
+ # Nesterov momentum
30
+ @velocity[parameter_id] = @momentum * @velocity[parameter_id] - @learning_rate * gradient
31
+ @momentum * @velocity[parameter_id] - @learning_rate * gradient
32
+ else
33
+ # Classical momentum
34
+ @velocity[parameter_id] = @momentum * @velocity[parameter_id] - @learning_rate * gradient
35
+ @velocity[parameter_id]
36
+ end
37
+ end
38
+
39
+ def reset
40
+ @velocity.clear
41
+ end
42
+ end
43
+
44
+ # Adam Optimizer (Adaptive Moment Estimation)
45
+ class Adam < Base
46
+ attr_reader :alpha, :beta1, :beta2, :epsilon
47
+
48
+ def initialize(alpha: 0.001, beta1: 0.9, beta2: 0.999, epsilon: 1e-8)
49
+ @alpha = alpha
50
+ @beta1 = beta1
51
+ @beta2 = beta2
52
+ @epsilon = epsilon
53
+ @m = {}
54
+ @v = {}
55
+ @t = 0
56
+ end
57
+
58
+ def update(parameter_id, gradient)
59
+ @t += 1
60
+ @m[parameter_id] ||= 0
61
+ @v[parameter_id] ||= 0
62
+
63
+ @m[parameter_id] = @beta1 * @m[parameter_id] + (1 - @beta1) * gradient
64
+ @v[parameter_id] = @beta2 * @v[parameter_id] + (1 - @beta2) * gradient**2
65
+
66
+ m_hat = @m[parameter_id] / (1 - @beta1**@t)
67
+ v_hat = @v[parameter_id] / (1 - @beta2**@t)
68
+
69
+ @alpha * m_hat / (Math.sqrt(v_hat) + @epsilon)
70
+ end
71
+
72
+ def reset
73
+ @m.clear
74
+ @v.clear
75
+ @t = 0
76
+ end
77
+ end
78
+
79
+ # RMSprop Optimizer
80
+ class RMSprop < Base
81
+ attr_reader :learning_rate, :decay_rate, :epsilon
82
+
83
+ def initialize(learning_rate: 0.001, decay_rate: 0.9, epsilon: 1e-8)
84
+ @learning_rate = learning_rate
85
+ @decay_rate = decay_rate
86
+ @epsilon = epsilon
87
+ @cache = {}
88
+ end
89
+
90
+ def update(parameter_id, gradient)
91
+ @cache[parameter_id] ||= 0
92
+ @cache[parameter_id] = @decay_rate * @cache[parameter_id] + (1 - @decay_rate) * gradient**2
93
+ @learning_rate * gradient / (Math.sqrt(@cache[parameter_id]) + @epsilon)
94
+ end
95
+
96
+ def reset
97
+ @cache.clear
98
+ end
99
+ end
100
+
101
+ # AdaGrad Optimizer
102
+ class AdaGrad < Base
103
+ attr_reader :learning_rate, :epsilon
104
+
105
+ def initialize(learning_rate: 0.01, epsilon: 1e-8)
106
+ @learning_rate = learning_rate
107
+ @epsilon = epsilon
108
+ @cache = {}
109
+ end
110
+
111
+ def update(parameter_id, gradient)
112
+ @cache[parameter_id] ||= 0
113
+ @cache[parameter_id] += gradient**2
114
+ @learning_rate * gradient / (Math.sqrt(@cache[parameter_id]) + @epsilon)
115
+ end
116
+
117
+ def reset
118
+ @cache.clear
119
+ end
120
+ end
121
+
122
+ # AdamW Optimizer (Adam with decoupled weight decay)
123
+ class AdamW < Base
124
+ attr_reader :alpha, :beta1, :beta2, :epsilon, :weight_decay
125
+
126
+ def initialize(alpha: 0.001, beta1: 0.9, beta2: 0.999, epsilon: 1e-8, weight_decay: 0.01)
127
+ @alpha = alpha
128
+ @beta1 = beta1
129
+ @beta2 = beta2
130
+ @epsilon = epsilon
131
+ @weight_decay = weight_decay
132
+ @m = {}
133
+ @v = {}
134
+ @t = 0
135
+ end
136
+
137
+ def update(parameter_id, gradient, parameter_value: 0)
138
+ @t += 1
139
+ @m[parameter_id] ||= 0
140
+ @v[parameter_id] ||= 0
141
+
142
+ @m[parameter_id] = @beta1 * @m[parameter_id] + (1 - @beta1) * gradient
143
+ @v[parameter_id] = @beta2 * @v[parameter_id] + (1 - @beta2) * gradient**2
144
+
145
+ m_hat = @m[parameter_id] / (1 - @beta1**@t)
146
+ v_hat = @v[parameter_id] / (1 - @beta2**@t)
147
+
148
+ # AdamW: decoupled weight decay
149
+ @alpha * (m_hat / (Math.sqrt(v_hat) + @epsilon) + @weight_decay * parameter_value)
150
+ end
151
+
152
+ def reset
153
+ @m.clear
154
+ @v.clear
155
+ @t = 0
156
+ end
157
+ end
158
+
159
+ # Alias for backward compatibility
160
+ AdamOptimizer = Adam
161
+ end
162
+ end
@@ -0,0 +1,48 @@
1
+ module GRYDRA
2
+ module Preprocessing
3
+ module Data
4
+ # Split data into training and test sets
5
+ def self.split_data(data_x, data_y, training_ratio = 0.8, seed = nil)
6
+ srand(seed) if seed
7
+ indices = (0...data_x.size).to_a.shuffle
8
+ cut = (data_x.size * training_ratio).to_i
9
+
10
+ {
11
+ train_x: indices[0...cut].map { |i| data_x[i] },
12
+ train_y: indices[0...cut].map { |i| data_y[i] },
13
+ test_x: indices[cut..-1].map { |i| data_x[i] },
14
+ test_y: indices[cut..-1].map { |i| data_y[i] }
15
+ }
16
+ end
17
+
18
+ # Generate synthetic data for testing
19
+ def self.generate_synthetic_data(n_samples, n_features, noise = 0.1, seed = nil)
20
+ srand(seed) if seed
21
+ data = Array.new(n_samples) do
22
+ Array.new(n_features) { rand * 2 - 1 + (rand * noise - noise / 2) }
23
+ end
24
+
25
+ # Generate labels based on a simple function
26
+ labels = data.map do |sample|
27
+ value = sample.each_with_index.sum { |x, i| x * (i + 1) * 0.1 }
28
+ [value + (rand * noise - noise / 2)]
29
+ end
30
+
31
+ { data: data, labels: labels }
32
+ end
33
+
34
+ # Convert hashes to vectors
35
+ def self.convert_hashes_to_vectors(array_hashes, keys)
36
+ array_hashes.map do |hash|
37
+ keys.map do |k|
38
+ if hash[k]
39
+ hash[k] == true ? 1.0 : (hash[k] == false ? 0.0 : hash[k].to_f)
40
+ else
41
+ 0.0
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,132 @@
1
+ module GRYDRA
2
+ module Preprocessing
3
+ module PCA
4
+ # Principal Component Analysis with Power Iteration
5
+ def self.pca(data, components: 2, max_iterations: 1000, tolerance: 1e-6)
6
+ n = data.size
7
+ m = data.first.size
8
+
9
+ # Center data
10
+ means = (0...m).map { |i| data.map { |row| row[i] }.sum.to_f / n }
11
+ centered_data = data.map { |row| row.zip(means).map { |v, mean| v - mean } }
12
+
13
+ # Calculate covariance matrix
14
+ covariance = calculate_covariance(centered_data, n, m)
15
+
16
+ # Find principal components using power iteration
17
+ principal_components = []
18
+ eigenvalues = []
19
+
20
+ components.times do |comp_idx|
21
+ eigenvector, eigenvalue = power_iteration(
22
+ covariance,
23
+ max_iterations: max_iterations,
24
+ tolerance: tolerance,
25
+ deflate: principal_components
26
+ )
27
+
28
+ break unless eigenvector
29
+
30
+ principal_components << eigenvector
31
+ eigenvalues << eigenvalue
32
+ end
33
+
34
+ # Project data onto principal components
35
+ transformed_data = centered_data.map do |row|
36
+ principal_components.map do |pc|
37
+ row.zip(pc).map { |a, b| a * b }.sum
38
+ end
39
+ end
40
+
41
+ {
42
+ means: means,
43
+ principal_components: principal_components,
44
+ eigenvalues: eigenvalues,
45
+ explained_variance: calculate_explained_variance(eigenvalues),
46
+ transformed_data: transformed_data,
47
+ covariance: covariance
48
+ }
49
+ end
50
+
51
+ def self.transform(data, pca_result)
52
+ # Transform new data using existing PCA
53
+ centered = data.map do |row|
54
+ row.zip(pca_result[:means]).map { |v, mean| v - mean }
55
+ end
56
+
57
+ centered.map do |row|
58
+ pca_result[:principal_components].map do |pc|
59
+ row.zip(pc).map { |a, b| a * b }.sum
60
+ end
61
+ end
62
+ end
63
+
64
+ private
65
+
66
+ def self.calculate_covariance(centered_data, n, m)
67
+ covariance = Array.new(m) { Array.new(m, 0.0) }
68
+ (0...m).each do |i|
69
+ (0...m).each do |j|
70
+ covariance[i][j] = centered_data.map { |row| row[i] * row[j] }.sum / (n - 1).to_f
71
+ end
72
+ end
73
+ covariance
74
+ end
75
+
76
+ def self.power_iteration(matrix, max_iterations:, tolerance:, deflate: [])
77
+ n = matrix.size
78
+
79
+ # Initialize random vector
80
+ vector = Array.new(n) { rand }
81
+ vector = normalize_vector(vector)
82
+
83
+ # Deflate for previously found components
84
+ deflate.each do |pc|
85
+ projection = vector.zip(pc).map { |a, b| a * b }.sum
86
+ vector = vector.zip(pc).map { |v, p| v - projection * p }
87
+ end
88
+ vector = normalize_vector(vector)
89
+
90
+ max_iterations.times do
91
+ # Multiply matrix by vector
92
+ new_vector = matrix_vector_multiply(matrix, vector)
93
+
94
+ # Normalize
95
+ new_vector = normalize_vector(new_vector)
96
+
97
+ # Check convergence
98
+ diff = vector.zip(new_vector).map { |a, b| (a - b).abs }.max
99
+
100
+ vector = new_vector
101
+
102
+ break if diff < tolerance
103
+ end
104
+
105
+ # Calculate eigenvalue (Rayleigh quotient)
106
+ numerator = matrix_vector_multiply(matrix, vector).zip(vector).map { |a, b| a * b }.sum
107
+ denominator = vector.map { |v| v**2 }.sum
108
+ eigenvalue = numerator / denominator
109
+
110
+ [vector, eigenvalue]
111
+ end
112
+
113
+ def self.matrix_vector_multiply(matrix, vector)
114
+ matrix.map do |row|
115
+ row.zip(vector).map { |a, b| a * b }.sum
116
+ end
117
+ end
118
+
119
+ def self.normalize_vector(vector)
120
+ magnitude = Math.sqrt(vector.map { |v| v**2 }.sum)
121
+ return vector if magnitude.zero?
122
+ vector.map { |v| v / magnitude }
123
+ end
124
+
125
+ def self.calculate_explained_variance(eigenvalues)
126
+ total = eigenvalues.sum
127
+ return [] if total.zero?
128
+ eigenvalues.map { |ev| ev / total }
129
+ end
130
+ end
131
+ end
132
+ end
@@ -0,0 +1,62 @@
1
+ module GRYDRA
2
+ module Preprocessing
3
+ module Text
4
+ # Create vocabulary from texts
5
+ def self.create_vocabulary(texts)
6
+ texts.map(&:split).flatten.map(&:downcase).uniq
7
+ end
8
+
9
+ # Create advanced vocabulary with frequency filtering
10
+ def self.create_advanced_vocabulary(texts, min_frequency = 1, max_words = nil)
11
+ frequencies = Hash.new(0)
12
+ texts.each do |text|
13
+ text.downcase.split.each { |word| frequencies[word] += 1 }
14
+ end
15
+
16
+ vocabulary = frequencies.select { |_, freq| freq >= min_frequency }.keys
17
+
18
+ if max_words && vocabulary.size > max_words
19
+ vocabulary = frequencies.sort_by { |_, freq| -freq }.first(max_words).map(&:first)
20
+ end
21
+
22
+ vocabulary.sort
23
+ end
24
+
25
+ # Vectorize text (binary)
26
+ def self.vectorize_text(text, vocabulary)
27
+ vector = Array.new(vocabulary.size, 0)
28
+ words = text.downcase.split
29
+ words.each do |word|
30
+ index = vocabulary.index(word)
31
+ vector[index] = 1 if index
32
+ end
33
+ vector
34
+ end
35
+
36
+ # Vectorize text using TF-IDF
37
+ def self.vectorize_text_tfidf(text, vocabulary, corpus_frequencies)
38
+ vector = Array.new(vocabulary.size, 0.0)
39
+ words = text.downcase.split
40
+ doc_frequencies = Hash.new(0)
41
+
42
+ words.each { |word| doc_frequencies[word] += 1 }
43
+
44
+ vocabulary.each_with_index do |word, idx|
45
+ next unless doc_frequencies[word] > 0
46
+
47
+ tf = doc_frequencies[word].to_f / words.size
48
+ idf = Math.log(corpus_frequencies.size.to_f / (corpus_frequencies[word] || 1))
49
+ vector[idx] = tf * idf
50
+ end
51
+
52
+ vector
53
+ end
54
+
55
+ # Normalize with vocabulary
56
+ def self.normalize_with_vocabulary(data, vocabulary)
57
+ max_value = vocabulary.size
58
+ data.map { |vector| vector.map { |v| v.to_f / max_value } }
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,19 @@
1
+ module GRYDRA
2
+ module Regularization
3
+ # Apply dropout to outputs during training
4
+ def self.apply_dropout(outputs, dropout_rate = 0.5, training = true)
5
+ return outputs unless training
6
+ outputs.map { |s| rand < dropout_rate ? 0 : s / (1 - dropout_rate) }
7
+ end
8
+
9
+ # L1 regularization (Lasso)
10
+ def self.l1_regularization(weights, lambda_l1)
11
+ lambda_l1 * weights.sum { |p| p.abs }
12
+ end
13
+
14
+ # L2 regularization (Ridge)
15
+ def self.l2_regularization(weights, lambda_l2)
16
+ lambda_l2 * weights.sum { |p| p**2 }
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,35 @@
1
+ module GRYDRA
2
+ module Training
3
+ module CrossValidation
4
+ # K-fold cross-validation
5
+ def self.cross_validation(data_input, data_output, k_folds = 5)
6
+ indices = (0...data_input.size).to_a.shuffle
7
+ fold_size = data_input.size / k_folds
8
+ errors = []
9
+
10
+ k_folds.times do |i|
11
+ start = i * fold_size
12
+ finish = [start + fold_size, data_input.size].min
13
+ indices_test = indices[start...finish]
14
+ indices_train = indices - indices_test
15
+
16
+ # Split data
17
+ train_x = indices_train.map { |idx| data_input[idx] }
18
+ train_y = indices_train.map { |idx| data_output[idx] }
19
+ test_x = indices_test.map { |idx| data_input[idx] }
20
+ test_y = indices_test.map { |idx| data_output[idx] }
21
+
22
+ # Train and evaluate
23
+ error = yield(train_x, train_y, test_x, test_y)
24
+ errors << error
25
+ end
26
+
27
+ {
28
+ errors: errors,
29
+ average: errors.sum / errors.size.to_f,
30
+ deviation: Math.sqrt(errors.map { |e| (e - errors.sum / errors.size.to_f)**2 }.sum / errors.size.to_f)
31
+ }
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,46 @@
1
+ module GRYDRA
2
+ module Training
3
+ module HyperparameterSearch
4
+ # Grid search for hyperparameters
5
+ def self.hyperparameter_search(data_x, data_y, param_grid, verbose: true)
6
+ best_params = nil
7
+ best_score = Float::INFINITY
8
+ results = []
9
+
10
+ puts 'Starting hyperparameter search...' if verbose
11
+
12
+ param_grid.each_with_index do |params, idx|
13
+ puts "Testing configuration #{idx + 1}/#{param_grid.size}: #{params}" if verbose
14
+
15
+ begin
16
+ score = yield(params, data_x, data_y)
17
+ results << { parameters: params, score: score }
18
+
19
+ if score < best_score
20
+ best_score = score
21
+ best_params = params
22
+ puts " New best configuration! Score: #{score.round(6)}" if verbose
23
+ else
24
+ puts " Score: #{score.round(6)}" if verbose
25
+ end
26
+ rescue StandardError => e
27
+ puts " Error with this configuration: #{e.message}" if verbose
28
+ results << { parameters: params, score: Float::INFINITY, error: e.message }
29
+ end
30
+ end
31
+
32
+ if verbose
33
+ puts "\nBest parameters found:"
34
+ puts " Configuration: #{best_params}"
35
+ puts " Score: #{best_score.round(6)}"
36
+ end
37
+
38
+ {
39
+ parameters: best_params,
40
+ score: best_score,
41
+ all_results: results.sort_by { |r| r[:score] }
42
+ }
43
+ end
44
+ end
45
+ end
46
+ end