ai4r 1.13 → 2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +174 -0
  3. data/examples/classifiers/hyperpipes_data.csv +14 -0
  4. data/examples/classifiers/hyperpipes_example.rb +22 -0
  5. data/examples/classifiers/ib1_example.rb +12 -0
  6. data/examples/classifiers/id3_example.rb +15 -10
  7. data/examples/classifiers/id3_graphviz_example.rb +17 -0
  8. data/examples/classifiers/logistic_regression_example.rb +11 -0
  9. data/examples/classifiers/naive_bayes_attributes_example.rb +13 -0
  10. data/examples/classifiers/naive_bayes_example.rb +12 -13
  11. data/examples/classifiers/one_r_example.rb +27 -0
  12. data/examples/classifiers/parameter_tutorial.rb +29 -0
  13. data/examples/classifiers/prism_nominal_example.rb +15 -0
  14. data/examples/classifiers/prism_numeric_example.rb +21 -0
  15. data/examples/classifiers/simple_linear_regression_example.rb +14 -11
  16. data/examples/classifiers/zero_and_one_r_example.rb +34 -0
  17. data/examples/classifiers/zero_one_r_data.csv +8 -0
  18. data/examples/clusterers/clusterer_example.rb +40 -34
  19. data/examples/clusterers/dbscan_example.rb +17 -0
  20. data/examples/clusterers/dendrogram_example.rb +17 -0
  21. data/examples/clusterers/hierarchical_dendrogram_example.rb +20 -0
  22. data/examples/clusterers/kmeans_custom_example.rb +26 -0
  23. data/examples/genetic_algorithm/bitstring_example.rb +41 -0
  24. data/examples/genetic_algorithm/genetic_algorithm_example.rb +26 -18
  25. data/examples/genetic_algorithm/kmeans_seed_tuning.rb +45 -0
  26. data/examples/neural_network/backpropagation_example.rb +48 -48
  27. data/examples/neural_network/hopfield_example.rb +45 -0
  28. data/examples/neural_network/patterns_with_base_noise.rb +39 -39
  29. data/examples/neural_network/patterns_with_noise.rb +41 -39
  30. data/examples/neural_network/train_epochs_callback.rb +25 -0
  31. data/examples/neural_network/training_patterns.rb +39 -39
  32. data/examples/neural_network/transformer_text_classification.rb +78 -0
  33. data/examples/neural_network/xor_example.rb +23 -22
  34. data/examples/reinforcement/q_learning_example.rb +10 -0
  35. data/examples/som/som_data.rb +155 -152
  36. data/examples/som/som_multi_node_example.rb +12 -13
  37. data/examples/som/som_single_example.rb +12 -15
  38. data/examples/transformer/decode_classifier_example.rb +68 -0
  39. data/examples/transformer/deterministic_example.rb +10 -0
  40. data/examples/transformer/seq2seq_example.rb +16 -0
  41. data/lib/ai4r/classifiers/classifier.rb +24 -16
  42. data/lib/ai4r/classifiers/gradient_boosting.rb +64 -0
  43. data/lib/ai4r/classifiers/hyperpipes.rb +119 -43
  44. data/lib/ai4r/classifiers/ib1.rb +122 -32
  45. data/lib/ai4r/classifiers/id3.rb +524 -145
  46. data/lib/ai4r/classifiers/logistic_regression.rb +96 -0
  47. data/lib/ai4r/classifiers/multilayer_perceptron.rb +75 -59
  48. data/lib/ai4r/classifiers/naive_bayes.rb +95 -34
  49. data/lib/ai4r/classifiers/one_r.rb +112 -44
  50. data/lib/ai4r/classifiers/prism.rb +167 -76
  51. data/lib/ai4r/classifiers/random_forest.rb +72 -0
  52. data/lib/ai4r/classifiers/simple_linear_regression.rb +83 -58
  53. data/lib/ai4r/classifiers/support_vector_machine.rb +91 -0
  54. data/lib/ai4r/classifiers/votes.rb +57 -0
  55. data/lib/ai4r/classifiers/zero_r.rb +71 -30
  56. data/lib/ai4r/clusterers/average_linkage.rb +46 -27
  57. data/lib/ai4r/clusterers/bisecting_k_means.rb +50 -44
  58. data/lib/ai4r/clusterers/centroid_linkage.rb +52 -36
  59. data/lib/ai4r/clusterers/cluster_tree.rb +50 -0
  60. data/lib/ai4r/clusterers/clusterer.rb +29 -14
  61. data/lib/ai4r/clusterers/complete_linkage.rb +42 -31
  62. data/lib/ai4r/clusterers/dbscan.rb +134 -0
  63. data/lib/ai4r/clusterers/diana.rb +75 -49
  64. data/lib/ai4r/clusterers/k_means.rb +270 -135
  65. data/lib/ai4r/clusterers/median_linkage.rb +49 -33
  66. data/lib/ai4r/clusterers/single_linkage.rb +196 -88
  67. data/lib/ai4r/clusterers/ward_linkage.rb +51 -35
  68. data/lib/ai4r/clusterers/ward_linkage_hierarchical.rb +25 -10
  69. data/lib/ai4r/clusterers/weighted_average_linkage.rb +48 -32
  70. data/lib/ai4r/data/data_set.rb +223 -103
  71. data/lib/ai4r/data/parameterizable.rb +31 -25
  72. data/lib/ai4r/data/proximity.rb +62 -62
  73. data/lib/ai4r/data/statistics.rb +46 -35
  74. data/lib/ai4r/experiment/classifier_evaluator.rb +84 -32
  75. data/lib/ai4r/experiment/split.rb +39 -0
  76. data/lib/ai4r/genetic_algorithm/chromosome_base.rb +43 -0
  77. data/lib/ai4r/genetic_algorithm/genetic_algorithm.rb +92 -170
  78. data/lib/ai4r/genetic_algorithm/tsp_chromosome.rb +83 -0
  79. data/lib/ai4r/hmm/hidden_markov_model.rb +134 -0
  80. data/lib/ai4r/neural_network/activation_functions.rb +37 -0
  81. data/lib/ai4r/neural_network/backpropagation.rb +399 -134
  82. data/lib/ai4r/neural_network/hopfield.rb +175 -58
  83. data/lib/ai4r/neural_network/transformer.rb +194 -0
  84. data/lib/ai4r/neural_network/weight_initializations.rb +40 -0
  85. data/lib/ai4r/reinforcement/policy_iteration.rb +66 -0
  86. data/lib/ai4r/reinforcement/q_learning.rb +51 -0
  87. data/lib/ai4r/search/a_star.rb +76 -0
  88. data/lib/ai4r/search/bfs.rb +50 -0
  89. data/lib/ai4r/search/dfs.rb +50 -0
  90. data/lib/ai4r/search/mcts.rb +118 -0
  91. data/lib/ai4r/search.rb +12 -0
  92. data/lib/ai4r/som/distance_metrics.rb +29 -0
  93. data/lib/ai4r/som/layer.rb +28 -17
  94. data/lib/ai4r/som/node.rb +61 -32
  95. data/lib/ai4r/som/som.rb +158 -41
  96. data/lib/ai4r/som/two_phase_layer.rb +21 -25
  97. data/lib/ai4r/version.rb +3 -0
  98. data/lib/ai4r.rb +57 -28
  99. metadata +79 -109
  100. data/README.rdoc +0 -39
  101. data/test/classifiers/hyperpipes_test.rb +0 -84
  102. data/test/classifiers/ib1_test.rb +0 -78
  103. data/test/classifiers/id3_test.rb +0 -220
  104. data/test/classifiers/multilayer_perceptron_test.rb +0 -79
  105. data/test/classifiers/naive_bayes_test.rb +0 -43
  106. data/test/classifiers/one_r_test.rb +0 -62
  107. data/test/classifiers/prism_test.rb +0 -85
  108. data/test/classifiers/simple_linear_regression_test.rb +0 -37
  109. data/test/classifiers/zero_r_test.rb +0 -50
  110. data/test/clusterers/average_linkage_test.rb +0 -51
  111. data/test/clusterers/bisecting_k_means_test.rb +0 -66
  112. data/test/clusterers/centroid_linkage_test.rb +0 -53
  113. data/test/clusterers/complete_linkage_test.rb +0 -57
  114. data/test/clusterers/diana_test.rb +0 -69
  115. data/test/clusterers/k_means_test.rb +0 -167
  116. data/test/clusterers/median_linkage_test.rb +0 -53
  117. data/test/clusterers/single_linkage_test.rb +0 -122
  118. data/test/clusterers/ward_linkage_hierarchical_test.rb +0 -81
  119. data/test/clusterers/ward_linkage_test.rb +0 -53
  120. data/test/clusterers/weighted_average_linkage_test.rb +0 -53
  121. data/test/data/data_set_test.rb +0 -104
  122. data/test/data/proximity_test.rb +0 -87
  123. data/test/data/statistics_test.rb +0 -65
  124. data/test/experiment/classifier_evaluator_test.rb +0 -76
  125. data/test/genetic_algorithm/chromosome_test.rb +0 -57
  126. data/test/genetic_algorithm/genetic_algorithm_test.rb +0 -81
  127. data/test/neural_network/backpropagation_test.rb +0 -82
  128. data/test/neural_network/hopfield_test.rb +0 -72
  129. data/test/som/som_test.rb +0 -97
@@ -1,149 +1,266 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Author:: Sergio Fierens
2
4
  # License:: MPL 1.1
3
5
  # Project:: ai4r
4
- # Url:: http://ai4r.org/
6
+ # Url:: https://github.com/SergioFierens/ai4r
5
7
  #
6
- # You can redistribute it and/or modify it under the terms of
7
- # the Mozilla Public License version 1.1 as published by the
8
+ # You can redistribute it and/or modify it under the terms of
9
+ # the Mozilla Public License version 1.1 as published by the
8
10
  # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
11
 
10
- require File.dirname(__FILE__) + '/../data/parameterizable'
12
+ require_relative '../data/parameterizable'
11
13
 
12
- module Ai4r
13
-
14
+ module Ai4r
14
15
  module NeuralNetwork
15
-
16
16
  # = Hopfield Net =
17
- #
17
+ #
18
18
  # A Hopfield Network is a recurrent Artificial Neural Network.
19
- # Hopfield nets are able to memorize a set of patterns, and then evaluate
19
+ # Hopfield nets are able to memorize a set of patterns, and then evaluate
20
20
  # an input, returning the most similar stored pattern (although
21
21
  # convergence to one of the stored patterns is not guaranteed).
22
- # Hopfield nets are great to deal with input noise. If a system accepts a
23
- # discrete set of inputs, but inputs are subject to noise, you can use a
22
+ # Hopfield nets are great to deal with input noise. If a system accepts a
23
+ # discrete set of inputs, but inputs are subject to noise, you can use a
24
24
  # Hopfield net to eliminate noise and identified the given input.
25
25
  #
26
26
  # = How to Use =
27
- #
27
+ #
28
28
  # data_set = Ai4r::Data::DataSet.new :data_items => array_of_patterns
29
29
  # net = Ai4r::NeuralNetworks::Hopfield.new.train data_set
30
30
  # net.eval input
31
31
  # => one of the stored patterns in array_of_patterns
32
32
  class Hopfield
33
-
34
33
  include Ai4r::Data::Parameterizable
35
-
34
+
36
35
  attr_reader :weights, :nodes
37
-
38
- parameters_info :eval_iterations => "The network will run for a maximum "+
39
- "of 'eval_iterations' iterations while evaluating an input. 500 by " +
40
- "default.",
41
- :active_node_value => "Default: 1",
42
- :inactive_node_value => "Default: -1",
43
- :threshold => "Default: 0"
44
-
45
- def initialize
36
+
37
+ parameters_info eval_iterations: 'The network will run for a maximum ' \
38
+ "of 'eval_iterations' iterations while evaluating an input. 500 by " \
39
+ 'default.',
40
+ active_node_value: 'Default: 1',
41
+ inactive_node_value: 'Default: -1',
42
+ threshold: 'Default: 0',
43
+ weight_scaling: 'Scale factor applied when computing weights. ' \
44
+ 'Default 1.0 / patterns_count',
45
+ stop_when_stable: 'Stop evaluation when consecutive energy ' \
46
+ 'values do not change. False by default',
47
+ update_strategy: 'Update mode: :async_random (default), ' \
48
+ ':async_sequential, :synchronous'
49
+
50
+ # @param params [Object]
51
+ # @return [Object]
52
+ def initialize(params = {})
46
53
  @eval_iterations = 500
47
54
  @active_node_value = 1
48
55
  @inactive_node_value = -1
49
56
  @threshold = 0
57
+ @weight_scaling = nil
58
+ @stop_when_stable = false
59
+ @update_strategy = :async_random
60
+ # Deterministic random generator to guarantee reproducible behaviour
61
+ @rng = Random.new(3)
62
+ set_parameters(params) if params && !params.empty?
50
63
  end
51
64
 
52
65
  # Prepares the network to memorize the given data set.
53
66
  # Future calls to eval (should) return one of the memorized data items.
54
- # A Hopfield network converges to a local minimum, but converge to one
67
+ # A Hopfield network converges to a local minimum, but converge to one
55
68
  # of the "memorized" patterns is not guaranteed.
69
+ # @param data_set [Object]
70
+ # @return [Object]
56
71
  def train(data_set)
57
72
  @data_set = data_set
73
+ validate_training_data
58
74
  initialize_nodes(@data_set)
59
75
  initialize_weights(@data_set)
60
- return self
76
+ self
61
77
  end
62
78
 
63
79
  # You can use run instead of eval to propagate values step by step.
64
- # With this you can verify the progress of the network output with
80
+ # With this you can verify the progress of the network output with
65
81
  # each step.
66
- #
82
+ #
67
83
  # E.g.:
68
84
  # pattern = input
69
85
  # 100.times do
70
86
  # pattern = net.run(pattern)
71
87
  # puts pattern.inspect
72
88
  # end
89
+ # @param input [Object]
90
+ # @return [Object]
73
91
  def run(input)
74
92
  set_input(input)
75
93
  propagate
76
- return @nodes
94
+ @nodes
77
95
  end
78
96
 
79
97
  # Propagates the input until the network returns one of the memorized
80
98
  # patterns, or a maximum of "eval_iterations" times.
81
- def eval(input)
99
+ #
100
+ # If +trace: true+ is passed the method returns a hash with the
101
+ # :states and :energies recorded at every iteration (including the
102
+ # initial state). This can be used to visualize convergence.
103
+ # @param input [Object]
104
+ # @param trace [Object]
105
+ # @return [Object]
106
+ def eval(input, trace: false)
82
107
  set_input(input)
108
+ prev_energy = energy
109
+ if trace
110
+ states = [@nodes.clone]
111
+ energies = [prev_energy]
112
+ end
83
113
  @eval_iterations.times do
84
- propagate
85
- break if @data_set.data_items.include?(@nodes)
114
+ propagate
115
+ new_energy = energy
116
+ if trace
117
+ states << @nodes.clone
118
+ energies << new_energy
119
+ end
120
+ if @data_set.data_items.include?(@nodes)
121
+ return(if trace
122
+ { states: states,
123
+ energies: energies }
124
+ else
125
+ @nodes
126
+ end)
127
+ end
128
+ break if @stop_when_stable && new_energy == prev_energy
129
+
130
+ prev_energy = new_energy
86
131
  end
87
- return @nodes
132
+ trace ? { states: states, energies: energies } : @nodes
88
133
  end
89
-
90
- protected
134
+
135
+ # Calculate network energy using current node states and weights.
136
+ # Energy = -0.5 * Σ w_ij * s_i * s_j
137
+ # @return [Object]
138
+ def energy
139
+ sum = 0.0
140
+ @nodes.each_with_index do |s_i, i|
141
+ i.times do |j|
142
+ sum += read_weight(i, j) * s_i * @nodes[j]
143
+ end
144
+ end
145
+ -sum
146
+ end
147
+
148
+ protected
149
+
91
150
  # Set all nodes state to the given input.
92
151
  # inputs parameter must have the same dimension as nodes
152
+ # @param inputs [Object]
153
+ # @return [Object]
93
154
  def set_input(inputs)
94
155
  raise ArgumentError unless inputs.length == @nodes.length
95
- inputs.each_with_index { |input, i| @nodes[i] = input}
156
+
157
+ inputs.each_with_index { |input, i| @nodes[i] = input }
96
158
  end
97
-
98
- # Select a single node randomly and propagate its state to all other nodes
159
+
160
+ # Propagate network state according to configured update strategy.
161
+ # @return [Object]
99
162
  def propagate
163
+ case @update_strategy
164
+ when :async_sequential
165
+ propagate_async_sequential
166
+ when :synchronous
167
+ propagate_synchronous
168
+ else
169
+ propagate_async_random
170
+ end
171
+ end
172
+
173
+ # Select a single node randomly and propagate its state to all other nodes
174
+ # @return [Object]
175
+ def propagate_async_random
100
176
  sum = 0
101
- i = (rand * @nodes.length).floor
102
- @nodes.each_with_index {|node, j| sum += read_weight(i,j)*node }
103
- @nodes[i] = (sum > @threshold) ? @active_node_value : @inactive_node_value
177
+ i = (@rng.rand * @nodes.length).floor
178
+ @nodes.each_with_index { |node, j| sum += read_weight(i, j) * node }
179
+ @nodes[i] = sum > @threshold ? @active_node_value : @inactive_node_value
180
+ end
181
+
182
+ # Iterate through nodes sequentially, updating each immediately
183
+ # @return [Object]
184
+ def propagate_async_sequential
185
+ @nodes.each_index do |i|
186
+ sum = 0
187
+ @nodes.each_with_index { |node, j| sum += read_weight(i, j) * node }
188
+ @nodes[i] = sum > @threshold ? @active_node_value : @inactive_node_value
189
+ end
190
+ end
191
+
192
+ # Update all nodes simultaneously using previous state
193
+ # @return [Object]
194
+ def propagate_synchronous
195
+ new_nodes = Array.new(@nodes.length)
196
+ @nodes.each_index do |i|
197
+ sum = 0
198
+ @nodes.each_with_index { |node, j| sum += read_weight(i, j) * node }
199
+ new_nodes[i] = sum > @threshold ? @active_node_value : @inactive_node_value
200
+ end
201
+ @nodes = new_nodes
104
202
  end
105
-
203
+
106
204
  # Initialize all nodes with "inactive" state.
205
+ # @param data_set [Object]
206
+ # @return [Object]
107
207
  def initialize_nodes(data_set)
108
- @nodes = Array.new(data_set.data_items.first.length,
109
- @inactive_node_value)
208
+ @nodes = Array.new(data_set.data_items.first.length,
209
+ @inactive_node_value)
210
+ end
211
+
212
+ # Ensure training data only contains active or inactive values.
213
+ # @return [Object]
214
+ def validate_training_data
215
+ allowed = [@active_node_value, @inactive_node_value]
216
+ @data_set.data_items.each_with_index do |item, row|
217
+ item.each_with_index do |v, col|
218
+ raise ArgumentError, "Invalid value #{v} in item #{row}, position #{col}" unless allowed.include?(v)
219
+ end
220
+ end
110
221
  end
111
-
222
+
112
223
  # Create a partial weigth matrix:
113
- # [
114
- # [w(1,0)],
224
+ # [
225
+ # [w(1,0)],
115
226
  # [w(2,0)], [w(2,1)],
116
227
  # [w(3,0)], [w(3,1)], [w(3,2)],
117
- # ...
228
+ # ...
118
229
  # [w(n-1,0)], [w(n-1,1)], [w(n-1,2)], ... , [w(n-1,n-2)]
119
230
  # ]
120
231
  # where n is the number of nodes.
121
- #
232
+ #
122
233
  # We are saving memory here, as:
123
- #
234
+ #
124
235
  # * w[i][i] = 0 (no node connects with itself)
125
236
  # * w[i][j] = w[j][i] (weigths are symmetric)
126
- #
237
+ #
127
238
  # Use read_weight(i,j) to find out weight between node i and j
239
+ # @param data_set [Object]
240
+ # @return [Object]
128
241
  def initialize_weights(data_set)
129
- @weights = Array.new(@nodes.length-1) {|l| Array.new(l+1)}
242
+ patterns_count = data_set.data_items.length
243
+ scaling = @weight_scaling || (1.0 / patterns_count)
244
+ @weights = Array.new(@nodes.length - 1) { |l| Array.new(l + 1) }
130
245
  @nodes.each_index do |i|
131
246
  i.times do |j|
132
- @weights[i-1][j] = data_set.data_items.inject(0) { |sum, item| sum+= item[i]*item[j] }
247
+ sum = data_set.data_items.inject(0) { |s, item| s + (item[i] * item[j]) }
248
+ @weights[i - 1][j] = sum * scaling
133
249
  end
134
250
  end
135
251
  end
136
-
137
- # read_weight(i,j) reads the weigth matrix and returns weight between
252
+
253
+ # read_weight(i,j) reads the weigth matrix and returns weight between
138
254
  # node i and j
255
+ # @param index_a [Object]
256
+ # @param index_b [Object]
257
+ # @return [Object]
139
258
  def read_weight(index_a, index_b)
140
259
  return 0 if index_a == index_b
260
+
141
261
  index_a, index_b = index_b, index_a if index_b > index_a
142
- return @weights[index_a-1][index_b]
262
+ @weights[index_a - 1][index_b]
143
263
  end
144
-
145
264
  end
146
-
147
265
  end
148
-
149
266
  end
@@ -0,0 +1,194 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Minimal Transformer implementation
4
+ # Author:: OpenAI Assistant
5
+ # License:: MPL 1.1
6
+ # Project:: ai4r
7
+ # Url:: https://github.com/SergioFierens/ai4r
8
+
9
+ require_relative '../data/parameterizable'
10
+ require_relative 'activation_functions'
11
+
12
+ module Ai4r
13
+ module NeuralNetwork
14
+ # A tiny Transformer with embeddings, positional encoding,
15
+ # multi-head attention and a feed-forward layer. Depending on the
16
+ # architecture configuration it can operate as an encoder, decoder or
17
+ # encoder-decoder model. Weights are initialized randomly and the model is
18
+ # not trainable.
19
+ class Transformer
20
+ include Ai4r::Data::Parameterizable
21
+
22
+ parameters_info embed_dim: 'Embedding dimension.',
23
+ num_heads: 'Number of attention heads.',
24
+ ff_dim: 'Feed-forward hidden size.',
25
+ vocab_size: 'Vocabulary size.',
26
+ max_len: 'Maximum sequence length.',
27
+ architecture: 'Architecture (:encoder, :decoder or :seq2seq).',
28
+ seed: 'Deterministic random seed for initialization.'
29
+
30
+ # Initialize the Transformer with given hyperparameters.
31
+ def initialize(vocab_size:, max_len:, embed_dim: 8, num_heads: 2, ff_dim: 32,
32
+ architecture: :encoder, seed: nil)
33
+ @seed = seed
34
+ @rng = seed ? Random.new(seed) : Random.new
35
+ @vocab_size = vocab_size
36
+ @max_len = max_len
37
+ @embed_dim = embed_dim
38
+ @num_heads = num_heads
39
+ @ff_dim = ff_dim
40
+ @architecture = architecture
41
+ if embed_dim % num_heads != 0
42
+ raise ArgumentError,
43
+ 'embed_dim must be divisible by num_heads'
44
+ end
45
+ raise ArgumentError, 'invalid architecture' unless %i[encoder decoder seq2seq].include?(@architecture)
46
+
47
+ init_weights
48
+ build_positional_encoding
49
+ end
50
+
51
+ # Evaluate a sequence of integer token ids. Returns an array of
52
+ # length seq_len with embed_dim sized vectors.
53
+ def eval(*args)
54
+ case @architecture
55
+ when :encoder
56
+ tokens = args.first
57
+ raise ArgumentError, 'sequence too long' if tokens.length > @max_len
58
+
59
+ encode(tokens)
60
+ when :decoder
61
+ tokens = args.first
62
+ raise ArgumentError, 'sequence too long' if tokens.length > @max_len
63
+
64
+ decode(tokens)
65
+ when :seq2seq
66
+ src, tgt = args
67
+ raise ArgumentError, 'sequence too long' if src.length > @max_len || tgt.length > @max_len
68
+
69
+ memory = encode(src)
70
+ decode(tgt, memory)
71
+ else
72
+ raise ArgumentError, 'invalid architecture'
73
+ end
74
+ end
75
+
76
+ private
77
+
78
+ def encode(tokens)
79
+ x = tokens.map.with_index { |t, i| add(@token_embeddings[t], @positional[i]) }
80
+ x = multi_head_attention(x)
81
+ feed_forward(x)
82
+ end
83
+
84
+ def decode(tokens, memory = nil)
85
+ x = tokens.map.with_index { |t, i| add(@token_embeddings[t], @positional[i]) }
86
+ mask = causal_mask(x.length)
87
+ x = multi_head_attention(x, x, x, mask)
88
+ x = multi_head_attention(x, memory, memory) if memory
89
+ feed_forward(x)
90
+ end
91
+
92
+ def causal_mask(len)
93
+ Array.new(len) { |i| Array.new(len) { |j| j <= i } }
94
+ end
95
+
96
+ def head_dim
97
+ @embed_dim / @num_heads
98
+ end
99
+
100
+ def init_weights
101
+ @token_embeddings = Array.new(@vocab_size) { Array.new(@embed_dim) { @rng.rand * 2 - 1 } }
102
+ hd = head_dim
103
+ @heads = Array.new(@num_heads) do
104
+ {
105
+ q: Array.new(@embed_dim) { Array.new(hd) { @rng.rand * 2 - 1 } },
106
+ k: Array.new(@embed_dim) { Array.new(hd) { @rng.rand * 2 - 1 } },
107
+ v: Array.new(@embed_dim) { Array.new(hd) { @rng.rand * 2 - 1 } }
108
+ }
109
+ end
110
+ @wo = Array.new(@num_heads * hd) { Array.new(@embed_dim) { @rng.rand * 2 - 1 } }
111
+ @w1 = Array.new(@embed_dim) { Array.new(@ff_dim) { @rng.rand * 2 - 1 } }
112
+ @b1 = Array.new(@ff_dim, 0.0)
113
+ @w2 = Array.new(@ff_dim) { Array.new(@embed_dim) { @rng.rand * 2 - 1 } }
114
+ @b2 = Array.new(@embed_dim, 0.0)
115
+ end
116
+
117
+ def build_positional_encoding
118
+ @positional = Array.new(@max_len) do |pos|
119
+ Array.new(@embed_dim) do |i|
120
+ angle = pos / (10_000.0**((2 * (i / 2)) / @embed_dim.to_f))
121
+ i.even? ? Math.sin(angle) : Math.cos(angle)
122
+ end
123
+ end
124
+ end
125
+
126
+ def add(a, b)
127
+ a.each_index.map { |i| a[i] + b[i] }
128
+ end
129
+
130
+ def dot(a, b)
131
+ sum = 0.0
132
+ a.each_index { |i| sum += a[i] * b[i] }
133
+ sum
134
+ end
135
+
136
+ def matmul(mat, weights)
137
+ mat.map do |row|
138
+ weights.transpose.map { |w| dot(row, w) }
139
+ end
140
+ end
141
+
142
+ def softmax(vec)
143
+ m = vec.max
144
+ exps = vec.map { |v| Math.exp(v - m) }
145
+ sum = exps.inject(:+)
146
+ exps.map { |e| e / sum }
147
+ end
148
+
149
+ def multi_head_attention(q_in, k_in = nil, v_in = nil, mask = nil)
150
+ k_in ||= q_in
151
+ v_in ||= k_in
152
+ hd = head_dim
153
+ heads_out = @heads.map do |h|
154
+ q = matmul(q_in, h[:q])
155
+ k = matmul(k_in, h[:k])
156
+ v = matmul(v_in, h[:v])
157
+ scores = matmul(q, k.transpose)
158
+ scale = Math.sqrt(hd.to_f)
159
+ scores.each_index do |i|
160
+ scores[i].each_index do |j|
161
+ scores[i][j] /= scale
162
+ scores[i][j] = -1e9 if mask && !mask[i][j]
163
+ end
164
+ end
165
+ scores.map! { |row| softmax(row) }
166
+ matmul(scores, v)
167
+ end
168
+ concat = Array.new(q_in.length) { [] }
169
+ heads_out.each do |head|
170
+ head.each_index do |i|
171
+ concat[i].concat(head[i])
172
+ end
173
+ end
174
+ matmul(concat, @wo)
175
+ end
176
+
177
+ def relu(x)
178
+ x.positive? ? x : 0
179
+ end
180
+
181
+ def affine(mat, weights, bias)
182
+ mat.map do |row|
183
+ weights.transpose.map.with_index { |w, j| dot(row, w) + bias[j] }
184
+ end
185
+ end
186
+
187
+ def feed_forward(x)
188
+ h = affine(x, @w1, @b1)
189
+ h.map! { |row| row.map { |v| relu(v) } }
190
+ affine(h, @w2, @b2)
191
+ end
192
+ end
193
+ end
194
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Author:: Sergio Fierens
4
+ # License:: MPL 1.1
5
+ # Project:: ai4r
6
+ # Url:: https://github.com/SergioFierens/ai4r
7
+ #
8
+ # You can redistribute it and/or modify it under the terms of
9
+ # the Mozilla Public License version 1.1 as published by the
10
+ # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
11
+
12
+ module Ai4r
13
+ module NeuralNetwork
14
+ # Collection of common weight initialization strategies.
15
+ module WeightInitializations
16
+ # Uniform distribution in [-1, 1)
17
+ def uniform
18
+ ->(_n, _i, _j) { (rand * 2) - 1 }
19
+ end
20
+
21
+ # Xavier/Glorot initialization based on layer dimensions
22
+ def xavier(structure)
23
+ lambda do |layer, _i, _j|
24
+ limit = Math.sqrt(6.0 / (structure[layer] + structure[layer + 1]))
25
+ (rand * 2 * limit) - limit
26
+ end
27
+ end
28
+
29
+ # He initialization suitable for ReLU activations
30
+ def he(structure)
31
+ lambda do |layer, _i, _j|
32
+ limit = Math.sqrt(6.0 / structure[layer])
33
+ (rand * 2 * limit) - limit
34
+ end
35
+ end
36
+
37
+ module_function :uniform, :xavier, :he
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,66 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Author:: OpenAI Assistant
4
+ # License:: MPL 1.1
5
+ # Project:: ai4r
6
+ #
7
+ # Classical policy iteration for finite MDPs.
8
+
9
+ require_relative '../data/parameterizable'
10
+
11
+ module Ai4r
12
+ module Reinforcement
13
+ # Compute an optimal policy for a known MDP.
14
+ class PolicyIteration
15
+ include Ai4r::Data::Parameterizable
16
+
17
+ parameters_info discount: 'Discount factor'
18
+
19
+ def initialize
20
+ @discount = 0.9
21
+ end
22
+
23
+ # Perform policy iteration.
24
+ # states:: Array of states
25
+ # actions:: Array of actions
26
+ # transition:: Hash[state][action] => {next_state => prob}
27
+ # reward:: Hash[state][action] => reward
28
+ def policy_iteration(states, actions, transition, reward)
29
+ policy = {}
30
+ states.each { |s| policy[s] = actions.first }
31
+ values = Hash.new(0.0)
32
+
33
+ loop do
34
+ # Policy evaluation
35
+ delta = Float::INFINITY
36
+ while delta > 1e-6
37
+ delta = 0.0
38
+ states.each do |s|
39
+ v = values[s]
40
+ a = policy[s]
41
+ new_v = reward[s][a] +
42
+ @discount * transition[s][a].sum { |s2, p| p * values[s2] }
43
+ values[s] = new_v
44
+ diff = (v - new_v).abs
45
+ delta = diff if diff > delta
46
+ end
47
+ end
48
+
49
+ # Policy improvement
50
+ stable = true
51
+ states.each do |s|
52
+ old = policy[s]
53
+ best = actions.max_by do |a|
54
+ reward[s][a] +
55
+ @discount * transition[s][a].sum { |s2, p| p * values[s2] }
56
+ end
57
+ policy[s] = best
58
+ stable = false if best != old
59
+ end
60
+ break if stable
61
+ end
62
+ policy
63
+ end
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Author:: OpenAI Assistant
4
+ # License:: MPL 1.1
5
+ # Project:: ai4r
6
+ #
7
+ # Basic tabular Q-learning implementation.
8
+
9
+ require_relative '../data/parameterizable'
10
+
11
+ module Ai4r
12
+ module Reinforcement
13
+ # Simple Q-learning agent storing Q-values in a Hash.
14
+ class QLearning
15
+ include Ai4r::Data::Parameterizable
16
+
17
+ parameters_info learning_rate: 'Update step size',
18
+ discount: 'Discount factor',
19
+ exploration: 'Exploration rate'
20
+
21
+ def initialize
22
+ @learning_rate = 0.1
23
+ @discount = 0.9
24
+ @exploration = 0.1
25
+ @q = Hash.new { |h, k| h[k] = Hash.new(0.0) }
26
+ end
27
+
28
+ # Update Q(s,a) from an observed transition.
29
+ def update(state, action, reward, next_state)
30
+ best_next = @q[next_state].values.max || 0.0
31
+ @q[state][action] += @learning_rate * (
32
+ reward + @discount * best_next - @q[state][action]
33
+ )
34
+ end
35
+
36
+ # Choose an action using an ε-greedy strategy.
37
+ def choose_action(state)
38
+ return nil if @q[state].empty?
39
+
40
+ if rand < @exploration
41
+ @q[state].keys.sample
42
+ else
43
+ @q[state].max_by { |_, v| v }.first
44
+ end
45
+ end
46
+
47
+ # Direct access to learned Q-values.
48
+ attr_reader :q
49
+ end
50
+ end
51
+ end