ai4r 1.13 → 2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +174 -0
  3. data/examples/classifiers/hyperpipes_data.csv +14 -0
  4. data/examples/classifiers/hyperpipes_example.rb +22 -0
  5. data/examples/classifiers/ib1_example.rb +12 -0
  6. data/examples/classifiers/id3_example.rb +15 -10
  7. data/examples/classifiers/id3_graphviz_example.rb +17 -0
  8. data/examples/classifiers/logistic_regression_example.rb +11 -0
  9. data/examples/classifiers/naive_bayes_attributes_example.rb +13 -0
  10. data/examples/classifiers/naive_bayes_example.rb +12 -13
  11. data/examples/classifiers/one_r_example.rb +27 -0
  12. data/examples/classifiers/parameter_tutorial.rb +29 -0
  13. data/examples/classifiers/prism_nominal_example.rb +15 -0
  14. data/examples/classifiers/prism_numeric_example.rb +21 -0
  15. data/examples/classifiers/simple_linear_regression_example.rb +14 -11
  16. data/examples/classifiers/zero_and_one_r_example.rb +34 -0
  17. data/examples/classifiers/zero_one_r_data.csv +8 -0
  18. data/examples/clusterers/clusterer_example.rb +40 -34
  19. data/examples/clusterers/dbscan_example.rb +17 -0
  20. data/examples/clusterers/dendrogram_example.rb +17 -0
  21. data/examples/clusterers/hierarchical_dendrogram_example.rb +20 -0
  22. data/examples/clusterers/kmeans_custom_example.rb +26 -0
  23. data/examples/genetic_algorithm/bitstring_example.rb +41 -0
  24. data/examples/genetic_algorithm/genetic_algorithm_example.rb +26 -18
  25. data/examples/genetic_algorithm/kmeans_seed_tuning.rb +45 -0
  26. data/examples/neural_network/backpropagation_example.rb +48 -48
  27. data/examples/neural_network/hopfield_example.rb +45 -0
  28. data/examples/neural_network/patterns_with_base_noise.rb +39 -39
  29. data/examples/neural_network/patterns_with_noise.rb +41 -39
  30. data/examples/neural_network/train_epochs_callback.rb +25 -0
  31. data/examples/neural_network/training_patterns.rb +39 -39
  32. data/examples/neural_network/transformer_text_classification.rb +78 -0
  33. data/examples/neural_network/xor_example.rb +23 -22
  34. data/examples/reinforcement/q_learning_example.rb +10 -0
  35. data/examples/som/som_data.rb +155 -152
  36. data/examples/som/som_multi_node_example.rb +12 -13
  37. data/examples/som/som_single_example.rb +12 -15
  38. data/examples/transformer/decode_classifier_example.rb +68 -0
  39. data/examples/transformer/deterministic_example.rb +10 -0
  40. data/examples/transformer/seq2seq_example.rb +16 -0
  41. data/lib/ai4r/classifiers/classifier.rb +24 -16
  42. data/lib/ai4r/classifiers/gradient_boosting.rb +64 -0
  43. data/lib/ai4r/classifiers/hyperpipes.rb +119 -43
  44. data/lib/ai4r/classifiers/ib1.rb +122 -32
  45. data/lib/ai4r/classifiers/id3.rb +524 -145
  46. data/lib/ai4r/classifiers/logistic_regression.rb +96 -0
  47. data/lib/ai4r/classifiers/multilayer_perceptron.rb +75 -59
  48. data/lib/ai4r/classifiers/naive_bayes.rb +95 -34
  49. data/lib/ai4r/classifiers/one_r.rb +112 -44
  50. data/lib/ai4r/classifiers/prism.rb +167 -76
  51. data/lib/ai4r/classifiers/random_forest.rb +72 -0
  52. data/lib/ai4r/classifiers/simple_linear_regression.rb +83 -58
  53. data/lib/ai4r/classifiers/support_vector_machine.rb +91 -0
  54. data/lib/ai4r/classifiers/votes.rb +57 -0
  55. data/lib/ai4r/classifiers/zero_r.rb +71 -30
  56. data/lib/ai4r/clusterers/average_linkage.rb +46 -27
  57. data/lib/ai4r/clusterers/bisecting_k_means.rb +50 -44
  58. data/lib/ai4r/clusterers/centroid_linkage.rb +52 -36
  59. data/lib/ai4r/clusterers/cluster_tree.rb +50 -0
  60. data/lib/ai4r/clusterers/clusterer.rb +29 -14
  61. data/lib/ai4r/clusterers/complete_linkage.rb +42 -31
  62. data/lib/ai4r/clusterers/dbscan.rb +134 -0
  63. data/lib/ai4r/clusterers/diana.rb +75 -49
  64. data/lib/ai4r/clusterers/k_means.rb +270 -135
  65. data/lib/ai4r/clusterers/median_linkage.rb +49 -33
  66. data/lib/ai4r/clusterers/single_linkage.rb +196 -88
  67. data/lib/ai4r/clusterers/ward_linkage.rb +51 -35
  68. data/lib/ai4r/clusterers/ward_linkage_hierarchical.rb +25 -10
  69. data/lib/ai4r/clusterers/weighted_average_linkage.rb +48 -32
  70. data/lib/ai4r/data/data_set.rb +223 -103
  71. data/lib/ai4r/data/parameterizable.rb +31 -25
  72. data/lib/ai4r/data/proximity.rb +62 -62
  73. data/lib/ai4r/data/statistics.rb +46 -35
  74. data/lib/ai4r/experiment/classifier_evaluator.rb +84 -32
  75. data/lib/ai4r/experiment/split.rb +39 -0
  76. data/lib/ai4r/genetic_algorithm/chromosome_base.rb +43 -0
  77. data/lib/ai4r/genetic_algorithm/genetic_algorithm.rb +92 -170
  78. data/lib/ai4r/genetic_algorithm/tsp_chromosome.rb +83 -0
  79. data/lib/ai4r/hmm/hidden_markov_model.rb +134 -0
  80. data/lib/ai4r/neural_network/activation_functions.rb +37 -0
  81. data/lib/ai4r/neural_network/backpropagation.rb +399 -134
  82. data/lib/ai4r/neural_network/hopfield.rb +175 -58
  83. data/lib/ai4r/neural_network/transformer.rb +194 -0
  84. data/lib/ai4r/neural_network/weight_initializations.rb +40 -0
  85. data/lib/ai4r/reinforcement/policy_iteration.rb +66 -0
  86. data/lib/ai4r/reinforcement/q_learning.rb +51 -0
  87. data/lib/ai4r/search/a_star.rb +76 -0
  88. data/lib/ai4r/search/bfs.rb +50 -0
  89. data/lib/ai4r/search/dfs.rb +50 -0
  90. data/lib/ai4r/search/mcts.rb +118 -0
  91. data/lib/ai4r/search.rb +12 -0
  92. data/lib/ai4r/som/distance_metrics.rb +29 -0
  93. data/lib/ai4r/som/layer.rb +28 -17
  94. data/lib/ai4r/som/node.rb +61 -32
  95. data/lib/ai4r/som/som.rb +158 -41
  96. data/lib/ai4r/som/two_phase_layer.rb +21 -25
  97. data/lib/ai4r/version.rb +3 -0
  98. data/lib/ai4r.rb +57 -28
  99. metadata +79 -109
  100. data/README.rdoc +0 -39
  101. data/test/classifiers/hyperpipes_test.rb +0 -84
  102. data/test/classifiers/ib1_test.rb +0 -78
  103. data/test/classifiers/id3_test.rb +0 -220
  104. data/test/classifiers/multilayer_perceptron_test.rb +0 -79
  105. data/test/classifiers/naive_bayes_test.rb +0 -43
  106. data/test/classifiers/one_r_test.rb +0 -62
  107. data/test/classifiers/prism_test.rb +0 -85
  108. data/test/classifiers/simple_linear_regression_test.rb +0 -37
  109. data/test/classifiers/zero_r_test.rb +0 -50
  110. data/test/clusterers/average_linkage_test.rb +0 -51
  111. data/test/clusterers/bisecting_k_means_test.rb +0 -66
  112. data/test/clusterers/centroid_linkage_test.rb +0 -53
  113. data/test/clusterers/complete_linkage_test.rb +0 -57
  114. data/test/clusterers/diana_test.rb +0 -69
  115. data/test/clusterers/k_means_test.rb +0 -167
  116. data/test/clusterers/median_linkage_test.rb +0 -53
  117. data/test/clusterers/single_linkage_test.rb +0 -122
  118. data/test/clusterers/ward_linkage_hierarchical_test.rb +0 -81
  119. data/test/clusterers/ward_linkage_test.rb +0 -53
  120. data/test/clusterers/weighted_average_linkage_test.rb +0 -53
  121. data/test/data/data_set_test.rb +0 -104
  122. data/test/data/proximity_test.rb +0 -87
  123. data/test/data/statistics_test.rb +0 -65
  124. data/test/experiment/classifier_evaluator_test.rb +0 -76
  125. data/test/genetic_algorithm/chromosome_test.rb +0 -57
  126. data/test/genetic_algorithm/genetic_algorithm_test.rb +0 -81
  127. data/test/neural_network/backpropagation_test.rb +0 -82
  128. data/test/neural_network/hopfield_test.rb +0 -72
  129. data/test/som/som_test.rb +0 -97
@@ -0,0 +1,96 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Author:: OpenAI Assistant
4
+ # License:: MPL 1.1
5
+ # Project:: ai4r
6
+ # Url:: https://github.com/SergioFierens/ai4r
7
+ #
8
+ # You can redistribute it and/or modify it under the terms of
9
+ # the Mozilla Public License version 1.1 as published by the
10
+ # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
11
+
12
+ require_relative '../data/data_set'
13
+ require_relative 'classifier'
14
+
15
+ module Ai4r
16
+ module Classifiers
17
+ # Implementation of binary Logistic Regression using gradient descent.
18
+ #
19
+ # Training data must have numeric attributes with the last attribute being
20
+ # the class label (0 or 1). Parameters can be adjusted with
21
+ # {Parameterizable#set_parameters}.
22
+ #
23
+ # Example:
24
+ # data = Ai4r::Data::DataSet.new(:data_items => [[0.2, 1], [0.4, 0]])
25
+ # classifier = LogisticRegression.new.build(data)
26
+ # classifier.eval([0.3])
27
+ class LogisticRegression < Classifier
28
+ attr_reader :weights
29
+
30
+ parameters_info learning_rate: 'Learning rate for gradient descent.',
31
+ iterations: 'Number of iterations to train.'
32
+
33
+ def initialize
34
+ super()
35
+ @learning_rate = 0.1
36
+ @iterations = 1000
37
+ @weights = nil
38
+ end
39
+
40
+ # Train the logistic regression classifier using the provided dataset.
41
+ def build(data_set)
42
+ raise 'Error instance must be passed' unless data_set.is_a?(Ai4r::Data::DataSet)
43
+
44
+ data_set.check_not_empty
45
+
46
+ x = data_set.data_items.map { |item| item[0...-1].map(&:to_f) }
47
+ y = data_set.data_items.map { |item| item.last.to_f }
48
+ m = x.length
49
+ n = x.first.length
50
+ @weights = Array.new(n + 1, 0.0) # last value is bias
51
+
52
+ @iterations.times do
53
+ predictions = x.map do |row|
54
+ z = row.each_with_index.inject(@weights.last) { |s, (v, j)| s + (v * @weights[j]) }
55
+ 1.0 / (1.0 + Math.exp(-z))
56
+ end
57
+ errors = predictions.zip(y).map { |p, label| p - label }
58
+
59
+ n.times do |j|
60
+ grad = (0...m).inject(0.0) { |sum, i| sum + (errors[i] * x[i][j]) } / m
61
+ @weights[j] -= @learning_rate * grad
62
+ end
63
+ bias_grad = errors.sum / m
64
+ @weights[n] -= @learning_rate * bias_grad
65
+ end
66
+ self
67
+ end
68
+
69
+ # Predict the class (0 or 1) for the given data array.
70
+ def eval(data)
71
+ raise 'Model not trained' unless @weights
72
+
73
+ expected_size = @weights.length - 1
74
+ if data.length != expected_size
75
+ raise ArgumentError,
76
+ "Wrong number of inputs. Expected: #{expected_size}, " \
77
+ "received: #{data.length}."
78
+ end
79
+
80
+ z = data.each_with_index.inject(@weights.last) do |s, (v, j)|
81
+ s + (v.to_f * @weights[j])
82
+ end
83
+ prob = 1.0 / (1.0 + Math.exp(-z))
84
+ prob >= 0.5 ? 1 : 0
85
+ end
86
+
87
+ # Logistic Regression classifiers cannot generate human readable rules.
88
+ #
89
+ # This method returns a string explaining that rule extraction is not
90
+ # supported for this algorithm.
91
+ def get_rules
92
+ 'LogisticRegression does not support rule extraction.'
93
+ end
94
+ end
95
+ end
96
+ end
@@ -1,104 +1,118 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Author:: Sergio Fierens (Implementation only)
2
4
  # License:: MPL 1.1
3
5
  # Project:: ai4r
4
- # Url:: http://ai4r.org/
6
+ # Url:: https://github.com/SergioFierens/ai4r
5
7
  #
6
- # You can redistribute it and/or modify it under the terms of
7
- # the Mozilla Public License version 1.1 as published by the
8
+ # You can redistribute it and/or modify it under the terms of
9
+ # the Mozilla Public License version 1.1 as published by the
8
10
  # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
11
 
10
- require File.dirname(__FILE__) + '/../data/data_set.rb'
11
- require File.dirname(__FILE__) + '/../classifiers/classifier'
12
- require File.dirname(__FILE__) + '/../neural_network/backpropagation'
12
+ require_relative '../data/data_set'
13
+ require_relative '../classifiers/classifier'
14
+ require_relative '../neural_network/backpropagation'
13
15
 
14
16
  module Ai4r
15
17
  module Classifiers
16
-
17
18
  # = Introduction
18
- #
19
- # The idea behind the MultilayerPerceptron classifier is to
20
- # train a Multilayer Perceptron neural network with the provided examples,
19
+ #
20
+ # The idea behind the MultilayerPerceptron classifier is to
21
+ # train a Multilayer Perceptron neural network with the provided examples,
21
22
  # and predict the class for new data items.
22
- #
23
+ #
23
24
  # = Parameters
24
- #
25
+ #
25
26
  # Use class method get_parameters_info to obtain details on the algorithm
26
27
  # parameters. Use set_parameters to set values for this parameters.
27
28
  # See Parameterizable module documentation.
28
- #
29
- # * :network_class => Neural network implementation class.
29
+ #
30
+ # * :network_class => Neural network implementation class.
30
31
  # By default: Ai4r::NeuralNetwork::Backpropagation.
31
32
  # * :network_parameters => Parameters to be forwarded to the back end
32
- # neural ntework.
33
- # * :hidden_layers => Hidden layer structure. E.g. [8, 6] will generate
33
+ # neural ntework.
34
+ # * :hidden_layers => Hidden layer structure. E.g. [8, 6] will generate
34
35
  # 2 hidden layers with 8 and 6 neurons each. By default []
35
- # * :training_iterations => How many times the training should be repeated.
36
- # By default: 1000.
37
- # :active_node_value => Default: 1
36
+ # * :training_iterations => How many times the training should be repeated.
37
+ # By default: 500.
38
+ # :active_node_value => Default: 1
38
39
  # :inactive_node_value => Default: 1
39
40
  class MultilayerPerceptron < Classifier
40
-
41
41
  attr_reader :data_set, :class_value, :network, :domains
42
-
43
- parameters_info :network_class => "Neural network implementation class."+
44
- "By default: Ai4r::NeuralNetwork::Backpropagation.",
45
- :network_parameters => "parameters to be forwarded to the back end " +
46
- "neural network.",
47
- :hidden_layers => "Hidden layer structure. E.g. [8, 6] will generate " +
48
- "2 hidden layers with 8 and 6 neurons each. By default []",
49
- :training_iterations => "How many times the training should be " +
50
- "repeated. By default: 500",
51
- :active_node_value => "Default: 1",
52
- :inactive_node_value => "Default: 0"
53
-
42
+
43
+ TRAINING_ITERATIONS = 500
44
+
45
+ parameters_info network_class: 'Neural network implementation class.' \
46
+ 'By default: Ai4r::NeuralNetwork::Backpropagation.',
47
+ network_parameters: 'parameters to be forwarded to the back end ' \
48
+ 'neural network.',
49
+ hidden_layers: 'Hidden layer structure. E.g. [8, 6] will generate ' \
50
+ '2 hidden layers with 8 and 6 neurons each. By default []',
51
+ training_iterations: 'How many times the training should be ' \
52
+ "repeated. By default: #{TRAINING_ITERATIONS}",
53
+ active_node_value: 'Default: 1',
54
+ inactive_node_value: 'Default: 0'
55
+
56
+ # @return [Object]
54
57
  def initialize
58
+ super()
55
59
  @network_class = Ai4r::NeuralNetwork::Backpropagation
56
60
  @hidden_layers = []
57
- @training_iterations = 500
61
+ @training_iterations = TRAINING_ITERATIONS
58
62
  @network_parameters = {}
59
63
  @active_node_value = 1
60
64
  @inactive_node_value = 0
61
65
  end
62
-
63
- # Build a new MultilayerPerceptron classifier. You must provide a DataSet
64
- # instance as parameter. The last attribute of each item is considered as
66
+
67
+ # Build a new MultilayerPerceptron classifier. You must provide a DataSet
68
+ # instance as parameter. The last attribute of each item is considered as
65
69
  # the item class.
70
+ # @param data_set [Object]
71
+ # @return [Object]
66
72
  def build(data_set)
67
73
  data_set.check_not_empty
68
74
  @data_set = data_set
69
- @domains = @data_set.build_domains.collect {|domain| domain.to_a}
75
+ @domains = @data_set.build_domains.collect(&:to_a)
70
76
  @outputs = @domains.last.length
71
77
  @inputs = 0
72
- @domains[0...-1].each {|domain| @inputs += domain.length}
78
+ @domains[0...-1].each { |domain| @inputs += domain.length }
73
79
  @structure = [@inputs] + @hidden_layers + [@outputs]
74
80
  @network = @network_class.new @structure
75
- @training_iterations.times do
76
- data_set.data_items.each do |data_item|
77
- input_values = data_to_input(data_item[0...-1])
78
- output_values = data_to_output(data_item.last)
79
- @network.train(input_values, output_values)
80
- end
81
+ inputs = []
82
+ outputs = []
83
+ data_set.data_items.each do |data_item|
84
+ inputs << data_to_input(data_item[0...-1])
85
+ outputs << data_to_output(data_item.last)
81
86
  end
82
- return self
87
+ @network.train_epochs(inputs, outputs,
88
+ epochs: @training_iterations, batch_size: 1)
89
+ self
83
90
  end
84
-
91
+ # rubocop:enable Metrics/AbcSize
92
+
85
93
  # You can evaluate new data, predicting its class.
86
94
  # e.g.
87
95
  # classifier.eval(['New York', '<30', 'F']) # => 'Y'
96
+ # @param data [Object]
97
+ # @return [Object]
88
98
  def eval(data)
89
99
  input_values = data_to_input(data)
90
100
  output_values = @network.eval(input_values)
91
- return @domains.last[get_max_index(output_values)]
101
+ @domains.last[get_max_index(output_values)]
92
102
  end
93
-
94
- # Multilayer Perceptron Classifiers cannot generate
103
+
104
+ # Multilayer Perceptron Classifiers cannot generate
95
105
  # human-readable rules.
106
+ # @return [Object]
96
107
  def get_rules
97
- return "raise 'Neural networks classifiers do not generate human-readable rules.'"
108
+ "raise 'Neural networks classifiers do not generate human-readable rules.'"
98
109
  end
110
+ # rubocop:enable Naming/AccessorMethodName
99
111
 
100
112
  protected
101
-
113
+
114
+ # @param data_item [Object]
115
+ # @return [Object]
102
116
  def data_to_input(data_item)
103
117
  input_values = Array.new(@inputs, @inactive_node_value)
104
118
  accum_index = 0
@@ -106,17 +120,21 @@ module Ai4r
106
120
  att_value = data_item[att_index]
107
121
  domain_index = @domains[att_index].index(att_value)
108
122
  input_values[domain_index + accum_index] = @active_node_value
109
- accum_index = @domains[att_index].length
123
+ accum_index += @domains[att_index].length
110
124
  end
111
- return input_values
125
+ input_values
112
126
  end
113
-
127
+
128
+ # @param data_item [Object]
129
+ # @return [Object]
114
130
  def data_to_output(data_item)
115
131
  output_values = Array.new(@outputs, @inactive_node_value)
116
132
  output_values[@domains.last.index(data_item)] = @active_node_value
117
- return output_values
133
+ output_values
118
134
  end
119
-
135
+
136
+ # @param output_values [Object]
137
+ # @return [Object]
120
138
  def get_max_index(output_values)
121
139
  max_value = @inactive_node_value
122
140
  max_index = 0
@@ -126,10 +144,8 @@ module Ai4r
126
144
  max_index = output_index
127
145
  end
128
146
  end
129
- return max_index
147
+ max_index
130
148
  end
131
-
132
149
  end
133
-
134
150
  end
135
151
  end
@@ -1,19 +1,19 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Author:: Thomas Kern
2
4
  # License:: MPL 1.1
3
5
  # Project:: ai4r
4
- # Url:: http://ai4r.org/
6
+ # Url:: https://github.com/SergioFierens/ai4r
5
7
  #
6
8
  # You can redistribute it and/or modify it under the terms of
7
9
  # the Mozilla Public License version 1.1 as published by the
8
10
  # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
11
 
10
- require File.dirname(__FILE__) + '/../data/data_set'
11
- require File.dirname(__FILE__) + '/classifier'
12
+ require_relative '../data/data_set'
13
+ require_relative 'classifier'
12
14
 
13
15
  module Ai4r
14
16
  module Classifiers
15
-
16
-
17
17
  # = Introduction
18
18
  #
19
19
  # This is an implementation of a Naive Bayesian Classifier without any
@@ -21,7 +21,7 @@ module Ai4r
21
21
  # Probabilities P(a_i | v_j) are estimated using m-estimates, hence the
22
22
  # m parameter as second parameter when isntantiating the class.
23
23
  # The estimation looks like this:
24
- #(n_c + mp) / (n + m)
24
+ # (n_c + mp) / (n + m)
25
25
  #
26
26
  # the variables are:
27
27
  # n = the number of training examples for which v = v_j
@@ -54,14 +54,21 @@ module Ai4r
54
54
  # build data
55
55
  # b.eval(["Red", "SUV", "Domestic"])
56
56
  #
57
-
57
+
58
+ # Probabilistic classifier based on Bayes' theorem.
58
59
  class NaiveBayes < Classifier
60
+ attr_reader :class_prob, :pcc, :pcp
59
61
 
60
- parameters_info :m => 'Default value is set to 0. It may be set to a value greater than ' +
61
- '0 when the size of the dataset is relatively small'
62
-
62
+ parameters_info m: 'Default value is set to 0. It may be set to a value greater than ' \
63
+ '0 when the size of the dataset is relatively small',
64
+ unknown_value_strategy: 'Behaviour when evaluating unseen attribute values: ' \
65
+ ':ignore (default), :uniform or :error.'
66
+
67
+ # @return [Object]
63
68
  def initialize
69
+ super()
64
70
  @m = 0
71
+ @unknown_value_strategy = :ignore
65
72
  @class_counts = []
66
73
  @class_prob = [] # stores the probability of the classes
67
74
  @pcc = [] # stores the number of instances divided into attribute/value/class
@@ -69,11 +76,13 @@ module Ai4r
69
76
  @klass_index = {} # hashmap for quick lookup of all the used klasses and their indice
70
77
  @values = {} # hashmap for quick lookup of all the values
71
78
  end
72
-
79
+
73
80
  # You can evaluate new data, predicting its category.
74
81
  # e.g.
75
82
  # b.eval(["Red", "SUV", "Domestic"])
76
83
  # => 'No'
84
+ # @param data [Object]
85
+ # @return [Object]
77
86
  def eval(data)
78
87
  prob = @class_prob.dup
79
88
  prob = calculate_class_probabilities_for_entry(data, prob)
@@ -82,13 +91,15 @@ module Ai4r
82
91
 
83
92
  # Calculates the probabilities for the data entry Data.
84
93
  # data has to be an array of the same dimension as the training data minus the
85
- # class column.
94
+ # class column.
86
95
  # Returns a map containint all classes as keys:
87
96
  # {Class_1 => probability, Class_2 => probability2 ... }
88
97
  # Probability is <= 1 and of type Float.
89
98
  # e.g.
90
99
  # b.get_probability_map(["Red", "SUV", "Domestic"])
91
100
  # => {"Yes"=>0.4166666666666667, "No"=>0.5833333333333334}
101
+ # @param data [Object]
102
+ # @return [Object]
92
103
  def get_probability_map(data)
93
104
  prob = @class_prob.dup
94
105
  prob = calculate_class_probabilities_for_entry(data, prob)
@@ -102,9 +113,11 @@ module Ai4r
102
113
  # counts values of the attribute instances and calculates the probability of the classes
103
114
  # and the conditional probabilities
104
115
  # Parameter data has to be an instance of CsvDataSet
116
+ # @param data [Object]
117
+ # @return [Object]
105
118
  def build(data)
106
119
  raise 'Error instance must be passed' unless data.is_a?(Ai4r::Data::DataSet)
107
- raise 'Data should not be empty' if data.data_items.length == 0
120
+ raise 'Data should not be empty' if data.data_items.empty?
108
121
 
109
122
  initialize_domain_data(data)
110
123
  initialize_klass_index
@@ -114,50 +127,86 @@ module Ai4r
114
127
  self
115
128
  end
116
129
 
130
+ # Naive Bayes classifiers cannot generate human readable rules.
131
+ # This method returns a descriptive string explaining that rule
132
+ # extraction is not supported for this algorithm.
133
+ def get_rules
134
+ 'NaiveBayes does not support rule extraction.'
135
+ end
136
+
117
137
  private
118
138
 
139
+ # @param data [Object]
140
+ # @return [Object]
119
141
  def initialize_domain_data(data)
120
142
  @domains = data.build_domains
121
143
  @data_items = data.data_items.map { |item| DataEntry.new(item[0...-1], item.last) }
122
144
  @data_labels = data.data_labels[0...-1]
123
- @klasses = @domains.last.to_a
145
+ @klasses = @domains.last.to_a.sort
124
146
  end
125
147
 
126
-
127
148
  # calculates the klass probability of a data entry
128
149
  # as usual, the probability of the value is multiplied with every conditional
129
150
  # probability of every attribute in condition to a specific class
130
151
  # this is repeated for every class
152
+ # @param data [Object]
153
+ # @param prob [Object]
154
+ # @return [Object]
131
155
  def calculate_class_probabilities_for_entry(data, prob)
132
156
  0.upto(prob.length - 1) do |prob_index|
133
157
  data.each_with_index do |att, index|
134
- next if value_index(att, index).nil?
135
- prob[prob_index] *= @pcp[index][value_index(att, index)][prob_index]
158
+ val_index = value_index(att, index)
159
+ if val_index.nil?
160
+ case @unknown_value_strategy
161
+ when :ignore
162
+ next
163
+ when :uniform
164
+ value_count = @pcc[index].count { |arr| arr[prob_index].positive? }
165
+ value_count = 1 if value_count.zero?
166
+ prob[prob_index] *= 1.0 / value_count
167
+ when :error
168
+ raise "Unknown value '#{att}' for attribute #{@data_labels[index]}"
169
+ else
170
+ next
171
+ end
172
+ else
173
+ prob[prob_index] *= @pcp[index][val_index][prob_index]
174
+ end
136
175
  end
176
+ # rubocop:enable Metrics/ClassLength
137
177
  end
138
-
178
+
139
179
  prob
140
180
  end
141
181
 
142
182
  # normalises the array of probabilities so the sum of the array equals 1
183
+ # @param prob [Object]
184
+ # @return [Object]
143
185
  def normalize_class_probability(prob)
144
186
  prob_sum = sum(prob)
145
- prob_sum > 0 ?
146
- prob.map { |prob_entry| prob_entry / prob_sum } :
187
+ if prob_sum.positive?
188
+ prob.map { |prob_entry| prob_entry / prob_sum }
189
+ else
147
190
  prob
191
+ end
148
192
  end
149
193
 
150
194
  # sums an array up; returns a number of type Float
195
+ # @param array [Object]
196
+ # @return [Object]
151
197
  def sum(array)
152
- array.inject(0.0) { |b, i| b + i }
198
+ array.sum(0.0)
153
199
  end
154
200
 
155
201
  # returns the name of the class when the index is found
202
+ # @param index [Object]
203
+ # @return [Object]
156
204
  def index_to_klass(index)
157
- @klass_index.has_value?(index) ? @klass_index.key(index) : nil
205
+ @klass_index.value?(index) ? @klass_index.key(index) : nil
158
206
  end
159
207
 
160
208
  # initializes @values and @klass_index; maps a certain value to a uniq index
209
+ # @return [Object]
161
210
  def initialize_klass_index
162
211
  @klasses.each_with_index do |dl, index|
163
212
  @klass_index[dl] = index
@@ -165,24 +214,31 @@ module Ai4r
165
214
 
166
215
  0.upto(@data_labels.length - 1) do |index|
167
216
  @values[index] = {}
168
- @domains[index].each_with_index do |d, d_index|
217
+ @domains[index].to_a.sort.each_with_index do |d, d_index|
169
218
  @values[index][d] = d_index
170
219
  end
171
220
  end
172
221
  end
173
222
 
174
223
  # returns the index of a class
224
+ # @param klass [Object]
225
+ # @return [Object]
175
226
  def klass_index(klass)
176
227
  @klass_index[klass]
177
228
  end
178
229
 
179
230
  # returns the index of a value, depending on the attribute index
231
+ # @param value [Object]
232
+ # @param dl_index [Object]
233
+ # @return [Object]
180
234
  def value_index(value, dl_index)
181
235
  @values[dl_index][value]
182
236
  end
183
237
 
184
238
  # builds an array of the form:
185
239
  # array[attributes][values][classes]
240
+ # @param index [Object]
241
+ # @return [Object]
186
242
  def build_array(index)
187
243
  domains = Array.new(@domains[index].length)
188
244
  domains.map do
@@ -192,6 +248,7 @@ module Ai4r
192
248
 
193
249
  # initializes the two array for storing the count and conditional probabilities of
194
250
  # the attributes
251
+ # @return [Object]
195
252
  def initialize_pc
196
253
  0.upto(@data_labels.length - 1) do |index|
197
254
  @pcc << build_array(index)
@@ -202,6 +259,7 @@ module Ai4r
202
259
  # calculates the occurrences of a class and the instances of a certain value of a
203
260
  # certain attribute and the assigned class.
204
261
  # In addition to that, it also calculates the conditional probabilities and values
262
+ # @return [Object]
205
263
  def calculate_probabilities
206
264
  @klasses.each { |dl| @class_counts[klass_index(dl)] = 0 }
207
265
 
@@ -210,6 +268,7 @@ module Ai4r
210
268
  calculate_conditional_probabilities
211
269
  end
212
270
 
271
+ # @return [Object]
213
272
  def calculate_class_probabilities
214
273
  @data_items.each do |entry|
215
274
  @class_counts[klass_index(entry.klass)] += 1
@@ -221,6 +280,7 @@ module Ai4r
221
280
  end
222
281
 
223
282
  # counts the instances of a certain value of a certain attribute and the assigned class
283
+ # @return [Object]
224
284
  def count_instances
225
285
  @data_items.each do |item|
226
286
  0.upto(@data_labels.length - 1) do |dl_index|
@@ -230,39 +290,40 @@ module Ai4r
230
290
  end
231
291
 
232
292
  # calculates the conditional probability and stores it in the @pcp-array
293
+ # @return [Object]
233
294
  def calculate_conditional_probabilities
234
295
  @pcc.each_with_index do |attributes, a_index|
235
296
  attributes.each_with_index do |values, v_index|
236
297
  values.each_with_index do |klass, k_index|
237
- @pcp[a_index][v_index][k_index] = (klass.to_f + @m * @class_prob[k_index]) / (@class_counts[k_index] + @m)
298
+ @pcp[a_index][v_index][k_index] =
299
+ (klass.to_f + (@m * @class_prob[k_index])) / (@class_counts[k_index] + @m)
238
300
  end
239
301
  end
240
302
  end
241
303
  end
242
304
 
243
- #DataEntry stores the instance of the data entry
244
- #the data is accessible via entries
245
- #stores the class-column in the attribute klass and
246
- #removes the column for the class-entry
305
+ # DataEntry stores the instance of the data entry
306
+ # the data is accessible via entries
307
+ # stores the class-column in the attribute klass and
308
+ # removes the column for the class-entry
247
309
  class DataEntry
248
310
  attr_accessor :klass, :entries
249
311
 
312
+ # @param attributes [Object]
313
+ # @param klass [Object]
314
+ # @return [Object]
250
315
  def initialize(attributes, klass)
251
316
  @klass = klass
252
317
  @entries = attributes
253
318
  end
254
319
 
255
320
  # wrapper method for the access to @entries
321
+ # @param index [Object]
322
+ # @return [Object]
256
323
  def [](index)
257
324
  @entries[index]
258
325
  end
259
326
  end
260
-
261
327
  end
262
328
  end
263
329
  end
264
-
265
- # Monkeypatch to support both ruby 1.8 and 1.9 (key vs index method)
266
- class Hash
267
- alias_method(:key, :index) unless method_defined?(:key)
268
- end