ai4r 1.12 → 2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +174 -0
  3. data/examples/classifiers/hyperpipes_data.csv +14 -0
  4. data/examples/classifiers/hyperpipes_example.rb +22 -0
  5. data/examples/classifiers/ib1_example.rb +12 -0
  6. data/examples/classifiers/id3_example.rb +15 -10
  7. data/examples/classifiers/id3_graphviz_example.rb +17 -0
  8. data/examples/classifiers/logistic_regression_example.rb +11 -0
  9. data/examples/classifiers/naive_bayes_attributes_example.rb +13 -0
  10. data/examples/classifiers/naive_bayes_example.rb +12 -13
  11. data/examples/classifiers/one_r_example.rb +27 -0
  12. data/examples/classifiers/parameter_tutorial.rb +29 -0
  13. data/examples/classifiers/prism_nominal_example.rb +15 -0
  14. data/examples/classifiers/prism_numeric_example.rb +21 -0
  15. data/examples/classifiers/simple_linear_regression_example.csv +159 -0
  16. data/examples/classifiers/simple_linear_regression_example.rb +18 -0
  17. data/examples/classifiers/zero_and_one_r_example.rb +34 -0
  18. data/examples/classifiers/zero_one_r_data.csv +8 -0
  19. data/examples/clusterers/clusterer_example.rb +62 -0
  20. data/examples/clusterers/dbscan_example.rb +17 -0
  21. data/examples/clusterers/dendrogram_example.rb +17 -0
  22. data/examples/clusterers/hierarchical_dendrogram_example.rb +20 -0
  23. data/examples/clusterers/kmeans_custom_example.rb +26 -0
  24. data/examples/genetic_algorithm/bitstring_example.rb +41 -0
  25. data/examples/genetic_algorithm/genetic_algorithm_example.rb +26 -18
  26. data/examples/genetic_algorithm/kmeans_seed_tuning.rb +45 -0
  27. data/examples/neural_network/backpropagation_example.rb +49 -48
  28. data/examples/neural_network/hopfield_example.rb +45 -0
  29. data/examples/neural_network/patterns_with_base_noise.rb +39 -39
  30. data/examples/neural_network/patterns_with_noise.rb +41 -39
  31. data/examples/neural_network/train_epochs_callback.rb +25 -0
  32. data/examples/neural_network/training_patterns.rb +39 -39
  33. data/examples/neural_network/transformer_text_classification.rb +78 -0
  34. data/examples/neural_network/xor_example.rb +23 -22
  35. data/examples/reinforcement/q_learning_example.rb +10 -0
  36. data/examples/som/som_data.rb +155 -152
  37. data/examples/som/som_multi_node_example.rb +12 -13
  38. data/examples/som/som_single_example.rb +12 -15
  39. data/examples/transformer/decode_classifier_example.rb +68 -0
  40. data/examples/transformer/deterministic_example.rb +10 -0
  41. data/examples/transformer/seq2seq_example.rb +16 -0
  42. data/lib/ai4r/classifiers/classifier.rb +24 -16
  43. data/lib/ai4r/classifiers/gradient_boosting.rb +64 -0
  44. data/lib/ai4r/classifiers/hyperpipes.rb +119 -43
  45. data/lib/ai4r/classifiers/ib1.rb +122 -32
  46. data/lib/ai4r/classifiers/id3.rb +527 -144
  47. data/lib/ai4r/classifiers/logistic_regression.rb +96 -0
  48. data/lib/ai4r/classifiers/multilayer_perceptron.rb +75 -59
  49. data/lib/ai4r/classifiers/naive_bayes.rb +112 -48
  50. data/lib/ai4r/classifiers/one_r.rb +112 -44
  51. data/lib/ai4r/classifiers/prism.rb +167 -76
  52. data/lib/ai4r/classifiers/random_forest.rb +72 -0
  53. data/lib/ai4r/classifiers/simple_linear_regression.rb +143 -0
  54. data/lib/ai4r/classifiers/support_vector_machine.rb +91 -0
  55. data/lib/ai4r/classifiers/votes.rb +57 -0
  56. data/lib/ai4r/classifiers/zero_r.rb +71 -30
  57. data/lib/ai4r/clusterers/average_linkage.rb +46 -27
  58. data/lib/ai4r/clusterers/bisecting_k_means.rb +50 -44
  59. data/lib/ai4r/clusterers/centroid_linkage.rb +52 -36
  60. data/lib/ai4r/clusterers/cluster_tree.rb +50 -0
  61. data/lib/ai4r/clusterers/clusterer.rb +28 -24
  62. data/lib/ai4r/clusterers/complete_linkage.rb +42 -31
  63. data/lib/ai4r/clusterers/dbscan.rb +134 -0
  64. data/lib/ai4r/clusterers/diana.rb +75 -49
  65. data/lib/ai4r/clusterers/k_means.rb +309 -72
  66. data/lib/ai4r/clusterers/median_linkage.rb +49 -33
  67. data/lib/ai4r/clusterers/single_linkage.rb +196 -88
  68. data/lib/ai4r/clusterers/ward_linkage.rb +51 -35
  69. data/lib/ai4r/clusterers/ward_linkage_hierarchical.rb +63 -0
  70. data/lib/ai4r/clusterers/weighted_average_linkage.rb +48 -32
  71. data/lib/ai4r/data/data_set.rb +229 -100
  72. data/lib/ai4r/data/parameterizable.rb +31 -25
  73. data/lib/ai4r/data/proximity.rb +72 -50
  74. data/lib/ai4r/data/statistics.rb +46 -35
  75. data/lib/ai4r/experiment/classifier_evaluator.rb +84 -32
  76. data/lib/ai4r/experiment/split.rb +39 -0
  77. data/lib/ai4r/genetic_algorithm/chromosome_base.rb +43 -0
  78. data/lib/ai4r/genetic_algorithm/genetic_algorithm.rb +92 -170
  79. data/lib/ai4r/genetic_algorithm/tsp_chromosome.rb +83 -0
  80. data/lib/ai4r/hmm/hidden_markov_model.rb +134 -0
  81. data/lib/ai4r/neural_network/activation_functions.rb +37 -0
  82. data/lib/ai4r/neural_network/backpropagation.rb +419 -143
  83. data/lib/ai4r/neural_network/hopfield.rb +175 -58
  84. data/lib/ai4r/neural_network/transformer.rb +194 -0
  85. data/lib/ai4r/neural_network/weight_initializations.rb +40 -0
  86. data/lib/ai4r/reinforcement/policy_iteration.rb +66 -0
  87. data/lib/ai4r/reinforcement/q_learning.rb +51 -0
  88. data/lib/ai4r/search/a_star.rb +76 -0
  89. data/lib/ai4r/search/bfs.rb +50 -0
  90. data/lib/ai4r/search/dfs.rb +50 -0
  91. data/lib/ai4r/search/mcts.rb +118 -0
  92. data/lib/ai4r/search.rb +12 -0
  93. data/lib/ai4r/som/distance_metrics.rb +29 -0
  94. data/lib/ai4r/som/layer.rb +28 -17
  95. data/lib/ai4r/som/node.rb +61 -32
  96. data/lib/ai4r/som/som.rb +158 -41
  97. data/lib/ai4r/som/two_phase_layer.rb +21 -25
  98. data/lib/ai4r/version.rb +3 -0
  99. data/lib/ai4r.rb +58 -27
  100. metadata +117 -106
  101. data/README.rdoc +0 -44
  102. data/test/classifiers/hyperpipes_test.rb +0 -84
  103. data/test/classifiers/ib1_test.rb +0 -78
  104. data/test/classifiers/id3_test.rb +0 -208
  105. data/test/classifiers/multilayer_perceptron_test.rb +0 -79
  106. data/test/classifiers/naive_bayes_test.rb +0 -43
  107. data/test/classifiers/one_r_test.rb +0 -62
  108. data/test/classifiers/prism_test.rb +0 -85
  109. data/test/classifiers/zero_r_test.rb +0 -50
  110. data/test/clusterers/average_linkage_test.rb +0 -51
  111. data/test/clusterers/bisecting_k_means_test.rb +0 -66
  112. data/test/clusterers/centroid_linkage_test.rb +0 -53
  113. data/test/clusterers/complete_linkage_test.rb +0 -57
  114. data/test/clusterers/diana_test.rb +0 -69
  115. data/test/clusterers/k_means_test.rb +0 -100
  116. data/test/clusterers/median_linkage_test.rb +0 -53
  117. data/test/clusterers/single_linkage_test.rb +0 -122
  118. data/test/clusterers/ward_linkage_test.rb +0 -53
  119. data/test/clusterers/weighted_average_linkage_test.rb +0 -53
  120. data/test/data/data_set_test.rb +0 -96
  121. data/test/data/proximity_test.rb +0 -81
  122. data/test/data/statistics_test.rb +0 -65
  123. data/test/experiment/classifier_evaluator_test.rb +0 -76
  124. data/test/genetic_algorithm/chromosome_test.rb +0 -57
  125. data/test/genetic_algorithm/genetic_algorithm_test.rb +0 -81
  126. data/test/neural_network/backpropagation_test.rb +0 -82
  127. data/test/neural_network/hopfield_test.rb +0 -72
  128. data/test/som/som_test.rb +0 -97
@@ -0,0 +1,143 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Author:: Malav Bhavsar
4
+ # License:: MPL 1.1
5
+ # Project:: ai4r
6
+ # Url:: https://github.com/SergioFierens/ai4r
7
+ #
8
+ # You can redistribute it and/or modify it under the terms of
9
+ # the Mozilla Public License version 1.1 as published by the
10
+ # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
11
+
12
+ require_relative '../data/data_set'
13
+ require_relative 'classifier'
14
+
15
+ module Ai4r
16
+ module Classifiers
17
+ # = Introduction
18
+ #
19
+ # This is an implementation of a Simple Linear Regression Classifier.
20
+ #
21
+ # For further details regarding Bayes and Naive Bayes Classifier have a look at this link:
22
+ # http://en.wikipedia.org/wiki/Naive_Bayesian_classification
23
+ # http://en.wikipedia.org/wiki/Bayes%27_theorem
24
+ #
25
+ #
26
+ # = How to use it
27
+ #
28
+ # data = DataSet.new.parse_csv_with_labels "autoPrice.csv"
29
+ # c = SimpleLinearRegression.new.
30
+ # build data
31
+ # c.eval([1,158,105.8,192.7,71.4,55.7,2844,136,3.19,3.4,8.5,110,5500,19,25])
32
+ #
33
+
34
+ # SimpleLinearRegression performs linear regression on one attribute.
35
+ class SimpleLinearRegression < Classifier
36
+ attr_reader :attribute, :attribute_index, :slope, :intercept
37
+
38
+ parameters_info selected_attribute: 'Index of attribute to use for regression.'
39
+
40
+ # @return [Object]
41
+ def initialize
42
+ super()
43
+ @attribute = nil
44
+ @attribute_index = 0
45
+ @slope = 0
46
+ @intercept = 0
47
+ @selected_attribute = nil
48
+ end
49
+
50
+ # You can evaluate new data, predicting its category.
51
+ # e.g.
52
+ # c.eval([1,158,105.8,192.7,71.4,55.7,2844,136,3.19,3.4,8.5,110,5500,19,25])
53
+ # => 11876.96774193548
54
+ # @param data [Object]
55
+ # @return [Object]
56
+ def eval(data)
57
+ @intercept + (@slope * data[@attribute_index])
58
+ end
59
+
60
+ # Gets the best attribute and does Linear Regression using it to find out the
61
+ # slope and intercept.
62
+ # Parameter data has to be an instance of DataSet
63
+ # @param data [Object]
64
+ # @return [Object]
65
+ def build(data)
66
+ validate_data(data)
67
+
68
+ y_mean = data.get_mean_or_mode[data.num_attributes - 1]
69
+ result = if @selected_attribute
70
+ evaluate_attribute(data, @selected_attribute, y_mean)
71
+ else
72
+ evaluate_all_attributes(data, y_mean)
73
+ end
74
+ assign_result(data, result)
75
+ end
76
+
77
+ def validate_data(data)
78
+ raise 'Error instance must be passed' unless data.is_a?(Ai4r::Data::DataSet)
79
+ raise 'Data should not be empty' if data.data_items.empty?
80
+ end
81
+
82
+ def evaluate_attribute(data, attr_index, y_mean)
83
+ x_mean = data.get_mean_or_mode[attr_index]
84
+ slope, x_diff_sq, y_diff_sq = attribute_sums(data, attr_index, x_mean, y_mean)
85
+ if x_diff_sq.zero?
86
+ { chosen: attr_index, slope: 0, intercept: y_mean, msq: Float::MAX }
87
+ else
88
+ chosen_slope = slope / x_diff_sq
89
+ intercept = y_mean - (chosen_slope * x_mean)
90
+ { chosen: attr_index, slope: chosen_slope, intercept: intercept, msq: y_diff_sq - (chosen_slope * slope) }
91
+ end
92
+ end
93
+
94
+ def evaluate_all_attributes(data, y_mean)
95
+ result = { chosen: -1, msq: Float::MAX }
96
+ data.data_labels.each do |attr_name|
97
+ attr_index = data.get_index attr_name
98
+ next if attr_index == data.num_attributes - 1
99
+
100
+ candidate = evaluate_attribute(data, attr_index, y_mean)
101
+ next unless candidate[:msq] < result[:msq]
102
+
103
+ result = candidate
104
+ end
105
+ result
106
+ end
107
+
108
+ def assign_result(data, result)
109
+ raise 'no useful attribute found' if result[:chosen] == -1
110
+
111
+ @attribute = data.data_labels[result[:chosen]]
112
+ @attribute_index = result[:chosen]
113
+ @slope = result[:slope]
114
+ @intercept = result[:intercept]
115
+ self
116
+ end
117
+
118
+ # Simple Linear Regression classifiers cannot generate human readable
119
+ # rules. This method returns a descriptive string indicating that rule
120
+ # extraction is not supported.
121
+ def get_rules
122
+ 'SimpleLinearRegression does not support rule extraction.'
123
+ end
124
+
125
+ private
126
+
127
+ # Calculate regression sums for the given attribute.
128
+ def attribute_sums(data, attr_index, x_mean, y_mean)
129
+ slope = 0
130
+ sum_x_diff_squared = 0
131
+ sum_y_diff_squared = 0
132
+ data.data_items.each do |instance|
133
+ x_diff = instance[attr_index] - x_mean
134
+ y_diff = instance[data.num_attributes - 1] - y_mean
135
+ slope += x_diff * y_diff
136
+ sum_x_diff_squared += x_diff * x_diff
137
+ sum_y_diff_squared += y_diff * y_diff
138
+ end
139
+ [slope, sum_x_diff_squared, sum_y_diff_squared]
140
+ end
141
+ end
142
+ end
143
+ end
@@ -0,0 +1,91 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Author:: OpenAI Assistant
4
+ # License:: MPL 1.1
5
+ # Project:: ai4r
6
+ # Url:: https://github.com/SergioFierens/ai4r
7
+ #
8
+ # A minimal linear Support Vector Machine implementation using
9
+ # stochastic gradient descent. This implementation is intentionally
10
+ # simple and only supports binary classification with numeric
11
+ # attributes.
12
+
13
+ require_relative '../data/data_set'
14
+ require_relative 'classifier'
15
+
16
+ module Ai4r
17
+ module Classifiers
18
+ # A lightweight linear SVM classifier trained via gradient descent.
19
+ # Only two classes are supported. Predictions return the same class
20
+ # labels used in the training data.
21
+ class SupportVectorMachine < Classifier
22
+ attr_reader :weights, :bias, :classes
23
+
24
+ parameters_info learning_rate: 'Learning rate for gradient descent.',
25
+ iterations: 'Training iterations.',
26
+ c: 'Regularization strength.'
27
+
28
+ def initialize
29
+ super()
30
+ @learning_rate = 0.01
31
+ @iterations = 1000
32
+ @c = 1.0
33
+ @weights = []
34
+ @bias = 0.0
35
+ @classes = []
36
+ end
37
+
38
+ # Train the SVM using the provided DataSet. Only numeric attributes and
39
+ # exactly two classes are supported.
40
+ def build(data_set)
41
+ data_set.check_not_empty
42
+ @classes = data_set.build_domains.last.to_a
43
+ raise ArgumentError, 'SVM only supports two classes' unless @classes.size == 2
44
+
45
+ num_features = data_set.data_labels.length - 1
46
+ @weights = Array.new(num_features, 0.0)
47
+ @bias = 0.0
48
+
49
+ samples = data_set.data_items.map do |row|
50
+ [row[0...-1].map(&:to_f), row.last]
51
+ end
52
+
53
+ @iterations.times do
54
+ samples.each do |features, label|
55
+ y = label == @classes[0] ? 1.0 : -1.0
56
+ prediction = dot(@weights, features) + @bias
57
+ if y * prediction < 1
58
+ @weights.map!.with_index do |w, i|
59
+ w + (@learning_rate * ((@c * y * features[i]) - (2 * w)))
60
+ end
61
+ @bias += @learning_rate * @c * y
62
+ else
63
+ @weights.map!.with_index { |w, _i| w - (@learning_rate * 2 * w) }
64
+ end
65
+ end
66
+ end
67
+ self
68
+ end
69
+
70
+ # Predict the class for the given numeric feature vector.
71
+ def eval(data)
72
+ score = dot(@weights, data.map(&:to_f)) + @bias
73
+ score >= 0 ? @classes[0] : @classes[1]
74
+ end
75
+
76
+ # Support Vector Machine classifiers cannot generate human readable rules.
77
+ # This method returns a string indicating rule extraction is unsupported.
78
+ def get_rules
79
+ 'SupportVectorMachine does not support rule extraction.'
80
+ end
81
+
82
+ private
83
+
84
+ def dot(a, b)
85
+ sum = 0.0
86
+ a.each_index { |i| sum += a[i] * b[i] }
87
+ sum
88
+ end
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Author:: Will Warner
4
+ # License:: MPL 1.1
5
+ # Project:: ai4r
6
+ # Url:: https://github.com/SergioFierens/ai4r
7
+ #
8
+ # You can redistribute it and/or modify it under the terms of
9
+ # the Mozilla Public License version 1.1 as published by the
10
+ # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
11
+
12
+ module Ai4r
13
+ module Classifiers
14
+ # Simple vote counter used by ensemble methods.
15
+ class Votes
16
+ # @return [Object]
17
+ def initialize
18
+ self.tally_sheet = Hash.new(0)
19
+ end
20
+
21
+ # @param category [Object]
22
+ # @return [Object]
23
+ def increment_category(category)
24
+ tally_sheet[category] += 1
25
+ end
26
+
27
+ # @param category [Object]
28
+ # @return [Object]
29
+ def tally_for(category)
30
+ tally_sheet[category]
31
+ end
32
+
33
+ # @param tie_break [Object]
34
+ # @return [Object]
35
+ def get_winner(tie_break = :last, rng: Random.new)
36
+ n = 0 # used to create a stable sort of the tallys
37
+ sorted_sheet = tally_sheet.sort_by do |_, score|
38
+ n += 1
39
+ [score, n]
40
+ end
41
+ return nil if sorted_sheet.empty?
42
+
43
+ if tie_break == :random
44
+ max_score = sorted_sheet.last[1]
45
+ tied = sorted_sheet.select { |_, score| score == max_score }.map(&:first)
46
+ tied.sample(random: rng)
47
+ else
48
+ sorted_sheet.last.first
49
+ end
50
+ end
51
+
52
+ private
53
+
54
+ attr_accessor :tally_sheet
55
+ end
56
+ end
57
+ end
@@ -1,73 +1,114 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Author:: Sergio Fierens (Implementation only)
2
4
  # License:: MPL 1.1
3
5
  # Project:: ai4r
4
- # Url:: http://ai4r.org/
6
+ # Url:: https://github.com/SergioFierens/ai4r
5
7
  #
6
- # You can redistribute it and/or modify it under the terms of
7
- # the Mozilla Public License version 1.1 as published by the
8
+ # You can redistribute it and/or modify it under the terms of
9
+ # the Mozilla Public License version 1.1 as published by the
8
10
  # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
11
 
10
- require File.dirname(__FILE__) + '/../data/data_set.rb'
11
- require File.dirname(__FILE__) + '/../classifiers/classifier'
12
+ require_relative '../data/data_set'
13
+ require_relative '../classifiers/classifier'
12
14
 
13
15
  module Ai4r
14
16
  module Classifiers
15
-
16
17
  # = Introduction
17
- #
18
- # The idea behind the ZeroR classifier is to identify the
19
- # the most common class value in the training set.
20
- # It always returns that value when evaluating an instance.
21
- # It is frequently used as a baseline for evaluating other machine learning
18
+ #
19
+ # The idea behind the ZeroR classifier is to identify the
20
+ # the most common class value in the training set.
21
+ # It always returns that value when evaluating an instance.
22
+ # It is frequently used as a baseline for evaluating other machine learning
22
23
  # algorithms.
23
24
  class ZeroR < Classifier
24
-
25
25
  attr_reader :data_set, :class_value
26
-
26
+
27
+ parameters_info default_class: 'Return this value when the provided ' \
28
+ 'dataset is empty.',
29
+ tie_break: 'Strategy used when more than one class has the ' \
30
+ 'same maximal frequency. Valid values are :first (default) ' \
31
+ 'and :random.',
32
+ random_seed: 'Seed for tie resolution when using :random strategy.'
33
+
34
+ # @return [Object]
35
+ def initialize
36
+ super()
37
+ @default_class = nil
38
+ @tie_break = :first
39
+ @random_seed = nil
40
+ @rng = nil
41
+ end
42
+
27
43
  # Build a new ZeroR classifier. You must provide a DataSet instance
28
- # as parameter. The last attribute of each item is considered as
44
+ # as parameter. The last attribute of each item is considered as
29
45
  # the item class.
46
+ # @param data_set [Object]
47
+ # @return [Object]
30
48
  def build(data_set)
31
- data_set.check_not_empty
32
49
  @data_set = data_set
33
- frequencies = {}
50
+
51
+ if @data_set.data_items.empty?
52
+ @class_value = @default_class
53
+ return self
54
+ end
55
+
56
+ frequencies = Hash.new(0)
34
57
  max_freq = 0
35
- @class_value = nil
58
+ tied_classes = []
59
+
36
60
  @data_set.data_items.each do |example|
37
61
  class_value = example.last
38
- frequencies[class_value] = frequencies[class_value].nil? ? 1 : frequencies[class_value] + 1
62
+ frequencies[class_value] += 1
39
63
  class_frequency = frequencies[class_value]
40
- if max_freq < class_frequency
64
+ if class_frequency > max_freq
41
65
  max_freq = class_frequency
42
- @class_value = class_value
66
+ tied_classes = [class_value]
67
+ elsif class_frequency == max_freq && !tied_classes.include?(class_value)
68
+ tied_classes << class_value
43
69
  end
44
70
  end
45
- return self
71
+
72
+ rng = @rng || (@random_seed.nil? ? Random.new : Random.new(@random_seed))
73
+
74
+ @class_value = if tied_classes.length == 1
75
+ tied_classes.first
76
+ else
77
+ case @tie_break
78
+ when :random
79
+ tied_classes.sample(random: rng)
80
+ else
81
+ tied_classes.first
82
+ end
83
+ end
84
+
85
+ self
46
86
  end
47
-
87
+
48
88
  # You can evaluate new data, predicting its class.
49
89
  # e.g.
50
90
  # classifier.eval(['New York', '<30', 'F']) # => 'Y'
51
- def eval(data)
91
+ # @param data [Object]
92
+ # @return [Object]
93
+ def eval(_data)
52
94
  @class_value
53
95
  end
54
-
96
+
55
97
  # This method returns the generated rules in ruby code.
56
98
  # e.g.
57
- #
99
+ #
58
100
  # classifier.get_rules
59
101
  # # => marketing_target='Y'
60
102
  #
61
- # It is a nice way to inspect induction results, and also to execute them:
103
+ # It is a nice way to inspect induction results, and also to execute them:
62
104
  # marketing_target = nil
63
- # eval classifier.get_rules
105
+ # eval classifier.get_rules
64
106
  # puts marketing_target
65
107
  # # => 'Y'
108
+ # @return [Object]
66
109
  def get_rules
67
- return "#{@data_set.data_labels.last} = '#{@class_value}'"
110
+ "#{@data_set.category_label} = '#{@class_value}'"
68
111
  end
69
-
70
112
  end
71
-
72
113
  end
73
114
  end
@@ -1,59 +1,78 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Author:: Sergio Fierens (implementation)
2
4
  # License:: MPL 1.1
3
5
  # Project:: ai4r
4
- # Url:: http://ai4r.org/
6
+ # Url:: https://github.com/SergioFierens/ai4r
5
7
  #
6
- # You can redistribute it and/or modify it under the terms of
7
- # the Mozilla Public License version 1.1 as published by the
8
+ # You can redistribute it and/or modify it under the terms of
9
+ # the Mozilla Public License version 1.1 as published by the
8
10
  # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
11
 
10
- require File.dirname(__FILE__) + '/../data/data_set'
11
- require File.dirname(__FILE__) + '/../clusterers/single_linkage'
12
+ require_relative '../data/data_set'
13
+ require_relative '../clusterers/single_linkage'
14
+ require_relative '../clusterers/cluster_tree'
12
15
 
13
16
  module Ai4r
14
17
  module Clusterers
15
-
16
18
  # Implementation of a Hierarchical clusterer with group average
17
- # linkage, AKA unweighted pair group method average or UPGMA (Everitt
19
+ # linkage, AKA unweighted pair group method average or UPGMA (Everitt
18
20
  # et al., 2001 ; Jain and Dubes, 1988 ; Sokal and Michener, 1958).
19
- # Hierarchical clusteres create one cluster per element, and then
21
+ # Hierarchical clusterer create one cluster per element, and then
20
22
  # progressively merge clusters, until the required number of clusters
21
23
  # is reached.
22
- # With average linkage, the distance between a clusters cx and
24
+ # With average linkage, the distance between a clusters cx and
23
25
  # cluster (ci U cj) the the average distance between cx and ci, and
24
26
  # cx and cj.
25
27
  #
26
28
  # D(cx, (ci U cj) = (D(cx, ci) + D(cx, cj)) / 2
27
29
  class AverageLinkage < SingleLinkage
28
-
29
- parameters_info :distance_function =>
30
- "Custom implementation of distance function. " +
31
- "It must be a closure receiving two data items and return the " +
32
- "distance bewteen them. By default, this algorithm uses " +
33
- "ecuclidean distance of numeric attributes to the power of 2."
34
-
30
+ include ClusterTree
31
+
32
+ parameters_info distance_function:
33
+ 'Custom implementation of distance function. ' \
34
+ 'It must be a closure receiving two data items and return the ' \
35
+ 'distance between them. By default, this algorithm uses ' \
36
+ 'euclidean distance of numeric attributes to the power of 2.'
37
+
35
38
  # Build a new clusterer, using data examples found in data_set.
36
39
  # Items will be clustered in "number_of_clusters" different
37
40
  # clusters.
38
- def build(data_set, number_of_clusters)
41
+ # @param data_set [Object]
42
+ # @param number_of_clusters [Object]
43
+ # @param *options [Object]
44
+ # @return [Object]
45
+ def build(data_set, number_of_clusters = 1, **options)
39
46
  super
40
47
  end
41
-
42
- # This algorithms does not allow classification of new data items
48
+
49
+ # This algorithms does not allow classification of new data items
43
50
  # once it has been built. Rebuild the cluster including you data element.
44
- def eval(data_item)
45
- Raise "Eval of new data is not supported by this algorithm."
51
+ # @param _data_item [Object]
52
+ # @return [Object]
53
+ def eval(_data_item)
54
+ raise NotImplementedError, 'Eval of new data is not supported by this algorithm.'
46
55
  end
47
-
56
+
57
+ # Average linkage builds a dendrogram and cannot classify new data
58
+ # once built.
59
+ # @return [Object]
60
+ def supports_eval?
61
+ false
62
+ end
63
+
48
64
  protected
49
-
65
+
50
66
  # return distance between cluster cx and cluster (ci U cj),
51
67
  # using average linkage
52
- def linkage_distance(cx, ci, cj)
53
- (read_distance_matrix(cx, ci)+
54
- read_distance_matrix(cx, cj))/2
68
+ # @param cx [Object]
69
+ # @param ci [Object]
70
+ # @param cj [Object]
71
+ # @return [Object]
72
+ def linkage_distance(cluster_x, cluster_i, cluster_j)
73
+ (read_distance_matrix(cluster_x, cluster_i) +
74
+ read_distance_matrix(cluster_x, cluster_j)) / 2
55
75
  end
56
-
57
76
  end
58
77
  end
59
78
  end