ai4r 1.13 → 2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +174 -0
  3. data/examples/classifiers/hyperpipes_data.csv +14 -0
  4. data/examples/classifiers/hyperpipes_example.rb +22 -0
  5. data/examples/classifiers/ib1_example.rb +12 -0
  6. data/examples/classifiers/id3_example.rb +15 -10
  7. data/examples/classifiers/id3_graphviz_example.rb +17 -0
  8. data/examples/classifiers/logistic_regression_example.rb +11 -0
  9. data/examples/classifiers/naive_bayes_attributes_example.rb +13 -0
  10. data/examples/classifiers/naive_bayes_example.rb +12 -13
  11. data/examples/classifiers/one_r_example.rb +27 -0
  12. data/examples/classifiers/parameter_tutorial.rb +29 -0
  13. data/examples/classifiers/prism_nominal_example.rb +15 -0
  14. data/examples/classifiers/prism_numeric_example.rb +21 -0
  15. data/examples/classifiers/simple_linear_regression_example.rb +14 -11
  16. data/examples/classifiers/zero_and_one_r_example.rb +34 -0
  17. data/examples/classifiers/zero_one_r_data.csv +8 -0
  18. data/examples/clusterers/clusterer_example.rb +40 -34
  19. data/examples/clusterers/dbscan_example.rb +17 -0
  20. data/examples/clusterers/dendrogram_example.rb +17 -0
  21. data/examples/clusterers/hierarchical_dendrogram_example.rb +20 -0
  22. data/examples/clusterers/kmeans_custom_example.rb +26 -0
  23. data/examples/genetic_algorithm/bitstring_example.rb +41 -0
  24. data/examples/genetic_algorithm/genetic_algorithm_example.rb +26 -18
  25. data/examples/genetic_algorithm/kmeans_seed_tuning.rb +45 -0
  26. data/examples/neural_network/backpropagation_example.rb +48 -48
  27. data/examples/neural_network/hopfield_example.rb +45 -0
  28. data/examples/neural_network/patterns_with_base_noise.rb +39 -39
  29. data/examples/neural_network/patterns_with_noise.rb +41 -39
  30. data/examples/neural_network/train_epochs_callback.rb +25 -0
  31. data/examples/neural_network/training_patterns.rb +39 -39
  32. data/examples/neural_network/transformer_text_classification.rb +78 -0
  33. data/examples/neural_network/xor_example.rb +23 -22
  34. data/examples/reinforcement/q_learning_example.rb +10 -0
  35. data/examples/som/som_data.rb +155 -152
  36. data/examples/som/som_multi_node_example.rb +12 -13
  37. data/examples/som/som_single_example.rb +12 -15
  38. data/examples/transformer/decode_classifier_example.rb +68 -0
  39. data/examples/transformer/deterministic_example.rb +10 -0
  40. data/examples/transformer/seq2seq_example.rb +16 -0
  41. data/lib/ai4r/classifiers/classifier.rb +24 -16
  42. data/lib/ai4r/classifiers/gradient_boosting.rb +64 -0
  43. data/lib/ai4r/classifiers/hyperpipes.rb +119 -43
  44. data/lib/ai4r/classifiers/ib1.rb +122 -32
  45. data/lib/ai4r/classifiers/id3.rb +524 -145
  46. data/lib/ai4r/classifiers/logistic_regression.rb +96 -0
  47. data/lib/ai4r/classifiers/multilayer_perceptron.rb +75 -59
  48. data/lib/ai4r/classifiers/naive_bayes.rb +95 -34
  49. data/lib/ai4r/classifiers/one_r.rb +112 -44
  50. data/lib/ai4r/classifiers/prism.rb +167 -76
  51. data/lib/ai4r/classifiers/random_forest.rb +72 -0
  52. data/lib/ai4r/classifiers/simple_linear_regression.rb +83 -58
  53. data/lib/ai4r/classifiers/support_vector_machine.rb +91 -0
  54. data/lib/ai4r/classifiers/votes.rb +57 -0
  55. data/lib/ai4r/classifiers/zero_r.rb +71 -30
  56. data/lib/ai4r/clusterers/average_linkage.rb +46 -27
  57. data/lib/ai4r/clusterers/bisecting_k_means.rb +50 -44
  58. data/lib/ai4r/clusterers/centroid_linkage.rb +52 -36
  59. data/lib/ai4r/clusterers/cluster_tree.rb +50 -0
  60. data/lib/ai4r/clusterers/clusterer.rb +29 -14
  61. data/lib/ai4r/clusterers/complete_linkage.rb +42 -31
  62. data/lib/ai4r/clusterers/dbscan.rb +134 -0
  63. data/lib/ai4r/clusterers/diana.rb +75 -49
  64. data/lib/ai4r/clusterers/k_means.rb +270 -135
  65. data/lib/ai4r/clusterers/median_linkage.rb +49 -33
  66. data/lib/ai4r/clusterers/single_linkage.rb +196 -88
  67. data/lib/ai4r/clusterers/ward_linkage.rb +51 -35
  68. data/lib/ai4r/clusterers/ward_linkage_hierarchical.rb +25 -10
  69. data/lib/ai4r/clusterers/weighted_average_linkage.rb +48 -32
  70. data/lib/ai4r/data/data_set.rb +223 -103
  71. data/lib/ai4r/data/parameterizable.rb +31 -25
  72. data/lib/ai4r/data/proximity.rb +62 -62
  73. data/lib/ai4r/data/statistics.rb +46 -35
  74. data/lib/ai4r/experiment/classifier_evaluator.rb +84 -32
  75. data/lib/ai4r/experiment/split.rb +39 -0
  76. data/lib/ai4r/genetic_algorithm/chromosome_base.rb +43 -0
  77. data/lib/ai4r/genetic_algorithm/genetic_algorithm.rb +92 -170
  78. data/lib/ai4r/genetic_algorithm/tsp_chromosome.rb +83 -0
  79. data/lib/ai4r/hmm/hidden_markov_model.rb +134 -0
  80. data/lib/ai4r/neural_network/activation_functions.rb +37 -0
  81. data/lib/ai4r/neural_network/backpropagation.rb +399 -134
  82. data/lib/ai4r/neural_network/hopfield.rb +175 -58
  83. data/lib/ai4r/neural_network/transformer.rb +194 -0
  84. data/lib/ai4r/neural_network/weight_initializations.rb +40 -0
  85. data/lib/ai4r/reinforcement/policy_iteration.rb +66 -0
  86. data/lib/ai4r/reinforcement/q_learning.rb +51 -0
  87. data/lib/ai4r/search/a_star.rb +76 -0
  88. data/lib/ai4r/search/bfs.rb +50 -0
  89. data/lib/ai4r/search/dfs.rb +50 -0
  90. data/lib/ai4r/search/mcts.rb +118 -0
  91. data/lib/ai4r/search.rb +12 -0
  92. data/lib/ai4r/som/distance_metrics.rb +29 -0
  93. data/lib/ai4r/som/layer.rb +28 -17
  94. data/lib/ai4r/som/node.rb +61 -32
  95. data/lib/ai4r/som/som.rb +158 -41
  96. data/lib/ai4r/som/two_phase_layer.rb +21 -25
  97. data/lib/ai4r/version.rb +3 -0
  98. data/lib/ai4r.rb +57 -28
  99. metadata +79 -109
  100. data/README.rdoc +0 -39
  101. data/test/classifiers/hyperpipes_test.rb +0 -84
  102. data/test/classifiers/ib1_test.rb +0 -78
  103. data/test/classifiers/id3_test.rb +0 -220
  104. data/test/classifiers/multilayer_perceptron_test.rb +0 -79
  105. data/test/classifiers/naive_bayes_test.rb +0 -43
  106. data/test/classifiers/one_r_test.rb +0 -62
  107. data/test/classifiers/prism_test.rb +0 -85
  108. data/test/classifiers/simple_linear_regression_test.rb +0 -37
  109. data/test/classifiers/zero_r_test.rb +0 -50
  110. data/test/clusterers/average_linkage_test.rb +0 -51
  111. data/test/clusterers/bisecting_k_means_test.rb +0 -66
  112. data/test/clusterers/centroid_linkage_test.rb +0 -53
  113. data/test/clusterers/complete_linkage_test.rb +0 -57
  114. data/test/clusterers/diana_test.rb +0 -69
  115. data/test/clusterers/k_means_test.rb +0 -167
  116. data/test/clusterers/median_linkage_test.rb +0 -53
  117. data/test/clusterers/single_linkage_test.rb +0 -122
  118. data/test/clusterers/ward_linkage_hierarchical_test.rb +0 -81
  119. data/test/clusterers/ward_linkage_test.rb +0 -53
  120. data/test/clusterers/weighted_average_linkage_test.rb +0 -53
  121. data/test/data/data_set_test.rb +0 -104
  122. data/test/data/proximity_test.rb +0 -87
  123. data/test/data/statistics_test.rb +0 -65
  124. data/test/experiment/classifier_evaluator_test.rb +0 -76
  125. data/test/genetic_algorithm/chromosome_test.rb +0 -57
  126. data/test/genetic_algorithm/genetic_algorithm_test.rb +0 -81
  127. data/test/neural_network/backpropagation_test.rb +0 -82
  128. data/test/neural_network/hopfield_test.rb +0 -72
  129. data/test/som/som_test.rb +0 -97
@@ -1,74 +1,85 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Author:: Sergio Fierens (implementation)
2
4
  # License:: MPL 1.1
3
5
  # Project:: ai4r
4
- # Url:: http://www.ai4r.org/
6
+ # Url:: https://github.com/SergioFierens/ai4r
5
7
  #
6
- # You can redistribute it and/or modify it under the terms of
7
- # the Mozilla Public License version 1.1 as published by the
8
+ # You can redistribute it and/or modify it under the terms of
9
+ # the Mozilla Public License version 1.1 as published by the
8
10
  # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
11
 
10
- require File.dirname(__FILE__) + '/../data/data_set'
11
- require File.dirname(__FILE__) + '/../data/proximity'
12
- require File.dirname(__FILE__) + '/../clusterers/clusterer'
12
+ require_relative '../data/data_set'
13
+ require_relative '../data/proximity'
14
+ require_relative '../clusterers/clusterer'
13
15
 
14
16
  module Ai4r
15
17
  module Clusterers
16
-
17
- # DIANA (Divisive ANAlysis) (Kaufman and Rousseeuw, 1990;
18
+ # DIANA (Divisive ANAlysis) (Kaufman and Rousseeuw, 1990;
18
19
  # Macnaughton - Smith et al. 1964) is a Divisive Hierarchical
19
20
  # Clusterer. It begins with only one cluster with all data items,
20
21
  # and divides the clusters until the desired clusters number is reached.
21
22
  class Diana < Clusterer
22
-
23
23
  attr_reader :data_set, :number_of_clusters, :clusters
24
-
25
- parameters_info :distance_function =>
26
- "Custom implementation of distance function. " +
27
- "It must be a closure receiving two data items and return the " +
28
- "distance between them. By default, this algorithm uses " +
29
- "euclidean distance of numeric attributes to the power of 2."
30
-
24
+
25
+ parameters_info distance_function:
26
+ 'Custom implementation of distance function. ' \
27
+ 'It must be a closure receiving two data items and return the ' \
28
+ 'distance between them. By default, this algorithm uses ' \
29
+ 'euclidean distance of numeric attributes to the power of 2.'
30
+
31
+ # @return [Object]
31
32
  def initialize
32
- @distance_function = lambda do |a,b|
33
- Ai4r::Data::Proximity.squared_euclidean_distance(
34
- a.select {|att_a| att_a.is_a? Numeric} ,
35
- b.select {|att_b| att_b.is_a? Numeric})
36
- end
33
+ super()
34
+ @distance_function = lambda do |a, b|
35
+ Ai4r::Data::Proximity.squared_euclidean_distance(
36
+ a.select { |att_a| att_a.is_a? Numeric },
37
+ b.select { |att_b| att_b.is_a? Numeric }
38
+ )
39
+ end
37
40
  end
38
-
41
+
39
42
  # Build a new clusterer, using divisive analysis (DIANA algorithm)
43
+ # @param data_set [Object]
44
+ # @param number_of_clusters [Object]
45
+ # @return [Object]
40
46
  def build(data_set, number_of_clusters)
41
47
  @data_set = data_set
42
48
  @number_of_clusters = number_of_clusters
43
- @clusters = [@data_set[0..-1]]
44
-
45
- while(@clusters.length < @number_of_clusters)
49
+ @clusters = [@data_set]
50
+
51
+ while @clusters.length < @number_of_clusters
46
52
  cluster_index_to_split = max_diameter_cluster(@clusters)
47
53
  cluster_to_split = @clusters[cluster_index_to_split]
48
54
  splinter_cluster = init_splinter_cluster(cluster_to_split)
49
- while true
55
+ loop do
50
56
  dist_diff, index = max_distance_difference(cluster_to_split, splinter_cluster)
51
- break if dist_diff < 0
57
+ break if dist_diff.negative?
58
+
52
59
  splinter_cluster << cluster_to_split.data_items[index]
53
60
  cluster_to_split.data_items.delete_at(index)
54
61
  end
55
62
  @clusters << splinter_cluster
56
63
  end
57
-
58
- return self
64
+
65
+ self
59
66
  end
60
-
61
- # Classifies the given data item, returning the cluster index it belongs
67
+
68
+ # Classifies the given data item, returning the cluster index it belongs
62
69
  # to (0-based).
70
+ # @param data_item [Object]
71
+ # @return [Object]
63
72
  def eval(data_item)
64
73
  get_min_index(@clusters.collect do |cluster|
65
74
  distance_sum(data_item, cluster) / cluster.data_items.length
66
- end)
75
+ end)
67
76
  end
68
-
77
+
69
78
  protected
70
-
79
+
71
80
  # return the cluster with max diameter
81
+ # @param clusters [Object]
82
+ # @return [Object]
72
83
  def max_diameter_cluster(clusters)
73
84
  max_index = 0
74
85
  max_diameter = 0
@@ -79,10 +90,12 @@ module Ai4r
79
90
  max_diameter = diameter
80
91
  end
81
92
  end
82
- return max_index
93
+ max_index
83
94
  end
84
-
95
+
85
96
  # Max distance between 2 items in a cluster
97
+ # @param cluster [Object]
98
+ # @return [Object]
86
99
  def cluster_diameter(cluster)
87
100
  diameter = 0
88
101
  cluster.data_items.each_with_index do |item_a, item_a_pos|
@@ -91,49 +104,62 @@ module Ai4r
91
104
  diameter = d if d > diameter
92
105
  end
93
106
  end
94
- return diameter
107
+ diameter
95
108
  end
96
-
109
+
97
110
  # Create a cluster with the item with mx distance
98
111
  # to the rest of the cluster's items.
99
112
  # That item is removed from the initial cluster.
113
+ # @param cluster_to_split [Object]
114
+ # @return [Object]
100
115
  def init_splinter_cluster(cluster_to_split)
101
116
  max = 0.0
102
117
  max_index = 0
103
118
  cluster_to_split.data_items.each_with_index do |item, index|
104
119
  sum = distance_sum(item, cluster_to_split)
105
- max, max_index = sum, index if sum > max
120
+ if sum > max
121
+ max = sum
122
+ max_index = index
123
+ end
106
124
  end
107
125
  splinter_cluster = cluster_to_split[max_index]
108
126
  cluster_to_split.data_items.delete_at(max_index)
109
- return splinter_cluster
127
+ splinter_cluster
110
128
  end
111
-
112
- # Return the max average distance between any item of
129
+
130
+ # Return the max average distance between any item of
113
131
  # cluster_to_split and the rest of items in that cluster,
114
132
  # minus the average distance with the items of splinter_cluster,
115
133
  # and the index of the item.
116
134
  # A positive value means that the items is closer to the
117
135
  # splinter group than to its current cluster.
136
+ # @param cluster_to_split [Object]
137
+ # @param splinter_cluster [Object]
138
+ # @return [Object]
118
139
  def max_distance_difference(cluster_to_split, splinter_cluster)
119
- max_diff = -1.0/0
140
+ max_diff = -Float::INFINITY
120
141
  max_diff_index = 0
121
142
  cluster_to_split.data_items.each_with_index do |item, index|
122
- dist_a = distance_sum(item, cluster_to_split) / (cluster_to_split.data_items.length-1)
123
- dist_b = distance_sum(item, splinter_cluster) / (splinter_cluster.data_items.length)
143
+ dist_a = distance_sum(item, cluster_to_split) / (cluster_to_split.data_items.length - 1)
144
+ dist_b = distance_sum(item, splinter_cluster) / splinter_cluster.data_items.length
124
145
  dist_diff = dist_a - dist_b
125
- max_diff, max_diff_index = dist_diff, index if dist_diff > max_diff
146
+ if dist_diff > max_diff
147
+ max_diff = dist_diff
148
+ max_diff_index = index
149
+ end
126
150
  end
127
- return max_diff, max_diff_index
151
+ [max_diff, max_diff_index]
128
152
  end
129
-
153
+
130
154
  # Sum up the distance between an item and all the items in a cluster
155
+ # @param item_a [Object]
156
+ # @param cluster [Object]
157
+ # @return [Object]
131
158
  def distance_sum(item_a, cluster)
132
159
  cluster.data_items.inject(0.0) do |sum, item_b|
133
160
  sum + @distance_function.call(item_a, item_b)
134
161
  end
135
162
  end
136
-
137
163
  end
138
164
  end
139
165
  end