ai4r 1.13 → 2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +174 -0
  3. data/examples/classifiers/hyperpipes_data.csv +14 -0
  4. data/examples/classifiers/hyperpipes_example.rb +22 -0
  5. data/examples/classifiers/ib1_example.rb +12 -0
  6. data/examples/classifiers/id3_example.rb +15 -10
  7. data/examples/classifiers/id3_graphviz_example.rb +17 -0
  8. data/examples/classifiers/logistic_regression_example.rb +11 -0
  9. data/examples/classifiers/naive_bayes_attributes_example.rb +13 -0
  10. data/examples/classifiers/naive_bayes_example.rb +12 -13
  11. data/examples/classifiers/one_r_example.rb +27 -0
  12. data/examples/classifiers/parameter_tutorial.rb +29 -0
  13. data/examples/classifiers/prism_nominal_example.rb +15 -0
  14. data/examples/classifiers/prism_numeric_example.rb +21 -0
  15. data/examples/classifiers/simple_linear_regression_example.rb +14 -11
  16. data/examples/classifiers/zero_and_one_r_example.rb +34 -0
  17. data/examples/classifiers/zero_one_r_data.csv +8 -0
  18. data/examples/clusterers/clusterer_example.rb +40 -34
  19. data/examples/clusterers/dbscan_example.rb +17 -0
  20. data/examples/clusterers/dendrogram_example.rb +17 -0
  21. data/examples/clusterers/hierarchical_dendrogram_example.rb +20 -0
  22. data/examples/clusterers/kmeans_custom_example.rb +26 -0
  23. data/examples/genetic_algorithm/bitstring_example.rb +41 -0
  24. data/examples/genetic_algorithm/genetic_algorithm_example.rb +26 -18
  25. data/examples/genetic_algorithm/kmeans_seed_tuning.rb +45 -0
  26. data/examples/neural_network/backpropagation_example.rb +48 -48
  27. data/examples/neural_network/hopfield_example.rb +45 -0
  28. data/examples/neural_network/patterns_with_base_noise.rb +39 -39
  29. data/examples/neural_network/patterns_with_noise.rb +41 -39
  30. data/examples/neural_network/train_epochs_callback.rb +25 -0
  31. data/examples/neural_network/training_patterns.rb +39 -39
  32. data/examples/neural_network/transformer_text_classification.rb +78 -0
  33. data/examples/neural_network/xor_example.rb +23 -22
  34. data/examples/reinforcement/q_learning_example.rb +10 -0
  35. data/examples/som/som_data.rb +155 -152
  36. data/examples/som/som_multi_node_example.rb +12 -13
  37. data/examples/som/som_single_example.rb +12 -15
  38. data/examples/transformer/decode_classifier_example.rb +68 -0
  39. data/examples/transformer/deterministic_example.rb +10 -0
  40. data/examples/transformer/seq2seq_example.rb +16 -0
  41. data/lib/ai4r/classifiers/classifier.rb +24 -16
  42. data/lib/ai4r/classifiers/gradient_boosting.rb +64 -0
  43. data/lib/ai4r/classifiers/hyperpipes.rb +119 -43
  44. data/lib/ai4r/classifiers/ib1.rb +122 -32
  45. data/lib/ai4r/classifiers/id3.rb +524 -145
  46. data/lib/ai4r/classifiers/logistic_regression.rb +96 -0
  47. data/lib/ai4r/classifiers/multilayer_perceptron.rb +75 -59
  48. data/lib/ai4r/classifiers/naive_bayes.rb +95 -34
  49. data/lib/ai4r/classifiers/one_r.rb +112 -44
  50. data/lib/ai4r/classifiers/prism.rb +167 -76
  51. data/lib/ai4r/classifiers/random_forest.rb +72 -0
  52. data/lib/ai4r/classifiers/simple_linear_regression.rb +83 -58
  53. data/lib/ai4r/classifiers/support_vector_machine.rb +91 -0
  54. data/lib/ai4r/classifiers/votes.rb +57 -0
  55. data/lib/ai4r/classifiers/zero_r.rb +71 -30
  56. data/lib/ai4r/clusterers/average_linkage.rb +46 -27
  57. data/lib/ai4r/clusterers/bisecting_k_means.rb +50 -44
  58. data/lib/ai4r/clusterers/centroid_linkage.rb +52 -36
  59. data/lib/ai4r/clusterers/cluster_tree.rb +50 -0
  60. data/lib/ai4r/clusterers/clusterer.rb +29 -14
  61. data/lib/ai4r/clusterers/complete_linkage.rb +42 -31
  62. data/lib/ai4r/clusterers/dbscan.rb +134 -0
  63. data/lib/ai4r/clusterers/diana.rb +75 -49
  64. data/lib/ai4r/clusterers/k_means.rb +270 -135
  65. data/lib/ai4r/clusterers/median_linkage.rb +49 -33
  66. data/lib/ai4r/clusterers/single_linkage.rb +196 -88
  67. data/lib/ai4r/clusterers/ward_linkage.rb +51 -35
  68. data/lib/ai4r/clusterers/ward_linkage_hierarchical.rb +25 -10
  69. data/lib/ai4r/clusterers/weighted_average_linkage.rb +48 -32
  70. data/lib/ai4r/data/data_set.rb +223 -103
  71. data/lib/ai4r/data/parameterizable.rb +31 -25
  72. data/lib/ai4r/data/proximity.rb +62 -62
  73. data/lib/ai4r/data/statistics.rb +46 -35
  74. data/lib/ai4r/experiment/classifier_evaluator.rb +84 -32
  75. data/lib/ai4r/experiment/split.rb +39 -0
  76. data/lib/ai4r/genetic_algorithm/chromosome_base.rb +43 -0
  77. data/lib/ai4r/genetic_algorithm/genetic_algorithm.rb +92 -170
  78. data/lib/ai4r/genetic_algorithm/tsp_chromosome.rb +83 -0
  79. data/lib/ai4r/hmm/hidden_markov_model.rb +134 -0
  80. data/lib/ai4r/neural_network/activation_functions.rb +37 -0
  81. data/lib/ai4r/neural_network/backpropagation.rb +399 -134
  82. data/lib/ai4r/neural_network/hopfield.rb +175 -58
  83. data/lib/ai4r/neural_network/transformer.rb +194 -0
  84. data/lib/ai4r/neural_network/weight_initializations.rb +40 -0
  85. data/lib/ai4r/reinforcement/policy_iteration.rb +66 -0
  86. data/lib/ai4r/reinforcement/q_learning.rb +51 -0
  87. data/lib/ai4r/search/a_star.rb +76 -0
  88. data/lib/ai4r/search/bfs.rb +50 -0
  89. data/lib/ai4r/search/dfs.rb +50 -0
  90. data/lib/ai4r/search/mcts.rb +118 -0
  91. data/lib/ai4r/search.rb +12 -0
  92. data/lib/ai4r/som/distance_metrics.rb +29 -0
  93. data/lib/ai4r/som/layer.rb +28 -17
  94. data/lib/ai4r/som/node.rb +61 -32
  95. data/lib/ai4r/som/som.rb +158 -41
  96. data/lib/ai4r/som/two_phase_layer.rb +21 -25
  97. data/lib/ai4r/version.rb +3 -0
  98. data/lib/ai4r.rb +57 -28
  99. metadata +79 -109
  100. data/README.rdoc +0 -39
  101. data/test/classifiers/hyperpipes_test.rb +0 -84
  102. data/test/classifiers/ib1_test.rb +0 -78
  103. data/test/classifiers/id3_test.rb +0 -220
  104. data/test/classifiers/multilayer_perceptron_test.rb +0 -79
  105. data/test/classifiers/naive_bayes_test.rb +0 -43
  106. data/test/classifiers/one_r_test.rb +0 -62
  107. data/test/classifiers/prism_test.rb +0 -85
  108. data/test/classifiers/simple_linear_regression_test.rb +0 -37
  109. data/test/classifiers/zero_r_test.rb +0 -50
  110. data/test/clusterers/average_linkage_test.rb +0 -51
  111. data/test/clusterers/bisecting_k_means_test.rb +0 -66
  112. data/test/clusterers/centroid_linkage_test.rb +0 -53
  113. data/test/clusterers/complete_linkage_test.rb +0 -57
  114. data/test/clusterers/diana_test.rb +0 -69
  115. data/test/clusterers/k_means_test.rb +0 -167
  116. data/test/clusterers/median_linkage_test.rb +0 -53
  117. data/test/clusterers/single_linkage_test.rb +0 -122
  118. data/test/clusterers/ward_linkage_hierarchical_test.rb +0 -81
  119. data/test/clusterers/ward_linkage_test.rb +0 -53
  120. data/test/clusterers/weighted_average_linkage_test.rb +0 -53
  121. data/test/data/data_set_test.rb +0 -104
  122. data/test/data/proximity_test.rb +0 -87
  123. data/test/data/statistics_test.rb +0 -65
  124. data/test/experiment/classifier_evaluator_test.rb +0 -76
  125. data/test/genetic_algorithm/chromosome_test.rb +0 -57
  126. data/test/genetic_algorithm/genetic_algorithm_test.rb +0 -81
  127. data/test/neural_network/backpropagation_test.rb +0 -82
  128. data/test/neural_network/hopfield_test.rb +0 -72
  129. data/test/som/som_test.rb +0 -97
@@ -1,21 +1,22 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Author:: Sergio Fierens (Implementation only)
2
4
  # License:: MPL 1.1
3
5
  # Project:: ai4r
4
- # Url:: http://ai4r.org/
6
+ # Url:: https://github.com/SergioFierens/ai4r
5
7
  #
6
- # You can redistribute it and/or modify it under the terms of
7
- # the Mozilla Public License version 1.1 as published by the
8
+ # You can redistribute it and/or modify it under the terms of
9
+ # the Mozilla Public License version 1.1 as published by the
8
10
  # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
11
 
10
12
  require 'set'
11
- require File.dirname(__FILE__) + '/../data/data_set'
12
- require File.dirname(__FILE__) + '/../classifiers/classifier'
13
+ require_relative '../data/data_set'
14
+ require_relative '../classifiers/classifier'
13
15
 
14
16
  module Ai4r
15
17
  module Classifiers
16
-
17
18
  # = Introduction
18
- #
19
+ #
19
20
  # IB1 algorithm implementation.
20
21
  # IB1 is the simplest instance-based learning (IBL) algorithm.
21
22
  #
@@ -26,45 +27,126 @@ module Ai4r
26
27
  # it normalizes its attributes' ranges, processes instances
27
28
  # incrementally, and has a simple policy for tolerating missing values
28
29
  class IB1 < Classifier
29
-
30
- attr_reader :data_set
30
+ attr_reader :data_set, :min_values, :max_values
31
+
32
+ parameters_info k: 'Number of nearest neighbors to consider. Default is 1.',
33
+ distance_function:
34
+ 'Optional custom distance metric taking two instances.',
35
+ tie_break:
36
+ 'Strategy used when neighbors vote tie. ' \
37
+ 'Valid values are :first (default) and :random.',
38
+ random_seed:
39
+ 'Seed for random tie-breaking when :tie_break is :random.'
40
+
41
+ # @return [Object]
42
+ def initialize
43
+ super()
44
+ @k = 1
45
+ @distance_function = nil
46
+ @tie_break = :first
47
+ @random_seed = nil
48
+ @rng = nil
49
+ end
31
50
 
32
51
  # Build a new IB1 classifier. You must provide a DataSet instance
33
- # as parameter. The last attribute of each item is considered as
52
+ # as parameter. The last attribute of each item is considered as
34
53
  # the item class.
54
+ # @param data_set [Object]
55
+ # @return [Object]
35
56
  def build(data_set)
36
57
  data_set.check_not_empty
37
58
  @data_set = data_set
38
59
  @min_values = Array.new(data_set.data_labels.length)
39
60
  @max_values = Array.new(data_set.data_labels.length)
40
61
  data_set.data_items.each { |data_item| update_min_max(data_item[0...-1]) }
41
- return self
62
+ self
63
+ end
64
+
65
+ # Append a new instance to the internal dataset. The last element is
66
+ # considered the class label. Minimum and maximum values for numeric
67
+ # attributes are updated so that future distance calculations remain
68
+ # normalized.
69
+ # @param data_item [Object]
70
+ # @return [Object]
71
+ def add_instance(data_item)
72
+ @data_set << data_item
73
+ update_min_max(data_item[0...-1])
74
+ self
42
75
  end
43
-
76
+
44
77
  # You can evaluate new data, predicting its class.
45
78
  # e.g.
46
- # classifier.eval(['New York', '<30', 'F']) # => 'Y'
79
+ # classifier.eval(['New York', '<30', 'F']) # => 'Y'
80
+ #
81
+ # Evaluation does not update internal statistics, keeping the
82
+ # classifier state unchanged. Use +update_with_instance+ to
83
+ # incorporate new samples.
47
84
  def eval(data)
48
- update_min_max(data)
49
- min_distance = 1.0/0
50
- klass = nil
51
- @data_set.data_items.each do |train_item|
52
- d = distance(data, train_item)
53
- if d < min_distance
54
- min_distance = d
55
- klass = train_item.last
56
- end
85
+ neighbors = @data_set.data_items.map do |train_item|
86
+ [distance(data, train_item), train_item.last]
87
+ end
88
+ neighbors.sort_by! { |d, _| d }
89
+ k_limit = [@k, @data_set.data_items.length].min
90
+ k_neighbors = neighbors.first(k_limit)
91
+
92
+ # Include any other neighbors tied with the last selected distance
93
+ last_distance = k_neighbors.last[0]
94
+ neighbors[k_limit..].to_a.each do |dist, klass|
95
+ break if dist > last_distance
96
+
97
+ k_neighbors << [dist, klass]
57
98
  end
58
- return klass
99
+
100
+ counts = Hash.new(0)
101
+ k_neighbors.each { |(_dist, klass)| counts[klass] += 1 }
102
+ max_votes = counts.values.max
103
+ tied = counts.select { |_, v| v == max_votes }.keys
104
+
105
+ return tied.first if tied.length == 1
106
+
107
+ rng = @rng || (@random_seed.nil? ? Random.new : Random.new(@random_seed))
108
+
109
+ case @tie_break
110
+ when :random
111
+ tied.sample(random: rng)
112
+ else
113
+ k_neighbors.each { |(_dist, klass)| return klass if tied.include?(klass) }
114
+ end
115
+ end
116
+
117
+ # Returns an array with the +k+ nearest instances from the training set
118
+ # for the given +data+ item. The returned elements are the training data
119
+ # rows themselves, ordered from the closest to the furthest.
120
+ # @param data [Object]
121
+ # @param k [Object]
122
+ # @return [Object]
123
+ def neighbors_for(data, k_neighbors)
124
+ update_min_max(data)
125
+ @data_set.data_items
126
+ .map { |train_item| [train_item, distance(data, train_item)] }
127
+ .sort_by(&:last)
128
+ .first(k_neighbors)
129
+ .map(&:first)
130
+ end
131
+
132
+ # Update min/max values with the provided instance attributes. If
133
+ # +learn+ is true, also append the instance to the training set so the
134
+ # classifier learns incrementally.
135
+ def update_with_instance(data_item, learn: false)
136
+ update_min_max(data_item[0...-1])
137
+ @data_set << data_item if learn
138
+ self
59
139
  end
60
-
140
+
61
141
  protected
62
142
 
63
143
  # We keep in the state the min and max value of each attribute,
64
144
  # to provide normalized distances between to values of a numeric attribute
145
+ # @param atts [Object]
146
+ # @return [Object]
65
147
  def update_min_max(atts)
66
148
  atts.each_with_index do |att, i|
67
- if att && att.is_a?(Numeric)
149
+ if att.is_a?(Numeric)
68
150
  @min_values[i] = att if @min_values[i].nil? || @min_values[i] > att
69
151
  @max_values[i] = att if @max_values[i].nil? || @max_values[i] < att
70
152
  end
@@ -80,10 +162,15 @@ module Ai4r
80
162
  # * 1 if both atts are missing
81
163
  # * normalized numeric att value if other att value is missing and > 0.5
82
164
  # * 1.0-normalized numeric att value if other att value is missing and < 0.5
83
- def distance(a, b)
165
+ # @param a [Object]
166
+ # @param b [Object]
167
+ # @return [Object]
168
+ def distance(data_a, data_b)
169
+ return @distance_function.call(data_a, data_b) if @distance_function
170
+
84
171
  d = 0
85
- a.each_with_index do |att_a, i|
86
- att_b = b[i]
172
+ data_a.each_with_index do |att_a, i|
173
+ att_b = data_b[i]
87
174
  if att_a.nil?
88
175
  if att_b.is_a? Numeric
89
176
  diff = norm(att_b, i)
@@ -93,7 +180,7 @@ module Ai4r
93
180
  end
94
181
  elsif att_a.is_a? Numeric
95
182
  if att_b.is_a? Numeric
96
- diff = norm(att_a, i) - norm(att_b, i);
183
+ diff = norm(att_a, i) - norm(att_b, i)
97
184
  else
98
185
  diff = norm(att_a, i)
99
186
  diff = 1.0 - diff if diff < 0.5
@@ -105,17 +192,20 @@ module Ai4r
105
192
  end
106
193
  d += diff * diff
107
194
  end
108
- return d
195
+ d
109
196
  end
110
197
 
111
198
  # Returns normalized value att
112
199
  #
113
200
  # index is the index of the attribute in the instance.
201
+ # @param att [Object]
202
+ # @param index [Object]
203
+ # @return [Object]
114
204
  def norm(att, index)
115
205
  return 0 if @min_values[index].nil?
116
- return 1.0*(att - @min_values[index]) / (@max_values[index] -@min_values[index]);
206
+
207
+ 1.0 * (att - @min_values[index]) / (@max_values[index] - @min_values[index])
117
208
  end
118
-
119
209
  end
120
210
  end
121
211
  end