ai4r 1.12 → 2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +174 -0
  3. data/examples/classifiers/hyperpipes_data.csv +14 -0
  4. data/examples/classifiers/hyperpipes_example.rb +22 -0
  5. data/examples/classifiers/ib1_example.rb +12 -0
  6. data/examples/classifiers/id3_example.rb +15 -10
  7. data/examples/classifiers/id3_graphviz_example.rb +17 -0
  8. data/examples/classifiers/logistic_regression_example.rb +11 -0
  9. data/examples/classifiers/naive_bayes_attributes_example.rb +13 -0
  10. data/examples/classifiers/naive_bayes_example.rb +12 -13
  11. data/examples/classifiers/one_r_example.rb +27 -0
  12. data/examples/classifiers/parameter_tutorial.rb +29 -0
  13. data/examples/classifiers/prism_nominal_example.rb +15 -0
  14. data/examples/classifiers/prism_numeric_example.rb +21 -0
  15. data/examples/classifiers/simple_linear_regression_example.csv +159 -0
  16. data/examples/classifiers/simple_linear_regression_example.rb +18 -0
  17. data/examples/classifiers/zero_and_one_r_example.rb +34 -0
  18. data/examples/classifiers/zero_one_r_data.csv +8 -0
  19. data/examples/clusterers/clusterer_example.rb +62 -0
  20. data/examples/clusterers/dbscan_example.rb +17 -0
  21. data/examples/clusterers/dendrogram_example.rb +17 -0
  22. data/examples/clusterers/hierarchical_dendrogram_example.rb +20 -0
  23. data/examples/clusterers/kmeans_custom_example.rb +26 -0
  24. data/examples/genetic_algorithm/bitstring_example.rb +41 -0
  25. data/examples/genetic_algorithm/genetic_algorithm_example.rb +26 -18
  26. data/examples/genetic_algorithm/kmeans_seed_tuning.rb +45 -0
  27. data/examples/neural_network/backpropagation_example.rb +49 -48
  28. data/examples/neural_network/hopfield_example.rb +45 -0
  29. data/examples/neural_network/patterns_with_base_noise.rb +39 -39
  30. data/examples/neural_network/patterns_with_noise.rb +41 -39
  31. data/examples/neural_network/train_epochs_callback.rb +25 -0
  32. data/examples/neural_network/training_patterns.rb +39 -39
  33. data/examples/neural_network/transformer_text_classification.rb +78 -0
  34. data/examples/neural_network/xor_example.rb +23 -22
  35. data/examples/reinforcement/q_learning_example.rb +10 -0
  36. data/examples/som/som_data.rb +155 -152
  37. data/examples/som/som_multi_node_example.rb +12 -13
  38. data/examples/som/som_single_example.rb +12 -15
  39. data/examples/transformer/decode_classifier_example.rb +68 -0
  40. data/examples/transformer/deterministic_example.rb +10 -0
  41. data/examples/transformer/seq2seq_example.rb +16 -0
  42. data/lib/ai4r/classifiers/classifier.rb +24 -16
  43. data/lib/ai4r/classifiers/gradient_boosting.rb +64 -0
  44. data/lib/ai4r/classifiers/hyperpipes.rb +119 -43
  45. data/lib/ai4r/classifiers/ib1.rb +122 -32
  46. data/lib/ai4r/classifiers/id3.rb +527 -144
  47. data/lib/ai4r/classifiers/logistic_regression.rb +96 -0
  48. data/lib/ai4r/classifiers/multilayer_perceptron.rb +75 -59
  49. data/lib/ai4r/classifiers/naive_bayes.rb +112 -48
  50. data/lib/ai4r/classifiers/one_r.rb +112 -44
  51. data/lib/ai4r/classifiers/prism.rb +167 -76
  52. data/lib/ai4r/classifiers/random_forest.rb +72 -0
  53. data/lib/ai4r/classifiers/simple_linear_regression.rb +143 -0
  54. data/lib/ai4r/classifiers/support_vector_machine.rb +91 -0
  55. data/lib/ai4r/classifiers/votes.rb +57 -0
  56. data/lib/ai4r/classifiers/zero_r.rb +71 -30
  57. data/lib/ai4r/clusterers/average_linkage.rb +46 -27
  58. data/lib/ai4r/clusterers/bisecting_k_means.rb +50 -44
  59. data/lib/ai4r/clusterers/centroid_linkage.rb +52 -36
  60. data/lib/ai4r/clusterers/cluster_tree.rb +50 -0
  61. data/lib/ai4r/clusterers/clusterer.rb +28 -24
  62. data/lib/ai4r/clusterers/complete_linkage.rb +42 -31
  63. data/lib/ai4r/clusterers/dbscan.rb +134 -0
  64. data/lib/ai4r/clusterers/diana.rb +75 -49
  65. data/lib/ai4r/clusterers/k_means.rb +309 -72
  66. data/lib/ai4r/clusterers/median_linkage.rb +49 -33
  67. data/lib/ai4r/clusterers/single_linkage.rb +196 -88
  68. data/lib/ai4r/clusterers/ward_linkage.rb +51 -35
  69. data/lib/ai4r/clusterers/ward_linkage_hierarchical.rb +63 -0
  70. data/lib/ai4r/clusterers/weighted_average_linkage.rb +48 -32
  71. data/lib/ai4r/data/data_set.rb +229 -100
  72. data/lib/ai4r/data/parameterizable.rb +31 -25
  73. data/lib/ai4r/data/proximity.rb +72 -50
  74. data/lib/ai4r/data/statistics.rb +46 -35
  75. data/lib/ai4r/experiment/classifier_evaluator.rb +84 -32
  76. data/lib/ai4r/experiment/split.rb +39 -0
  77. data/lib/ai4r/genetic_algorithm/chromosome_base.rb +43 -0
  78. data/lib/ai4r/genetic_algorithm/genetic_algorithm.rb +92 -170
  79. data/lib/ai4r/genetic_algorithm/tsp_chromosome.rb +83 -0
  80. data/lib/ai4r/hmm/hidden_markov_model.rb +134 -0
  81. data/lib/ai4r/neural_network/activation_functions.rb +37 -0
  82. data/lib/ai4r/neural_network/backpropagation.rb +419 -143
  83. data/lib/ai4r/neural_network/hopfield.rb +175 -58
  84. data/lib/ai4r/neural_network/transformer.rb +194 -0
  85. data/lib/ai4r/neural_network/weight_initializations.rb +40 -0
  86. data/lib/ai4r/reinforcement/policy_iteration.rb +66 -0
  87. data/lib/ai4r/reinforcement/q_learning.rb +51 -0
  88. data/lib/ai4r/search/a_star.rb +76 -0
  89. data/lib/ai4r/search/bfs.rb +50 -0
  90. data/lib/ai4r/search/dfs.rb +50 -0
  91. data/lib/ai4r/search/mcts.rb +118 -0
  92. data/lib/ai4r/search.rb +12 -0
  93. data/lib/ai4r/som/distance_metrics.rb +29 -0
  94. data/lib/ai4r/som/layer.rb +28 -17
  95. data/lib/ai4r/som/node.rb +61 -32
  96. data/lib/ai4r/som/som.rb +158 -41
  97. data/lib/ai4r/som/two_phase_layer.rb +21 -25
  98. data/lib/ai4r/version.rb +3 -0
  99. data/lib/ai4r.rb +58 -27
  100. metadata +117 -106
  101. data/README.rdoc +0 -44
  102. data/test/classifiers/hyperpipes_test.rb +0 -84
  103. data/test/classifiers/ib1_test.rb +0 -78
  104. data/test/classifiers/id3_test.rb +0 -208
  105. data/test/classifiers/multilayer_perceptron_test.rb +0 -79
  106. data/test/classifiers/naive_bayes_test.rb +0 -43
  107. data/test/classifiers/one_r_test.rb +0 -62
  108. data/test/classifiers/prism_test.rb +0 -85
  109. data/test/classifiers/zero_r_test.rb +0 -50
  110. data/test/clusterers/average_linkage_test.rb +0 -51
  111. data/test/clusterers/bisecting_k_means_test.rb +0 -66
  112. data/test/clusterers/centroid_linkage_test.rb +0 -53
  113. data/test/clusterers/complete_linkage_test.rb +0 -57
  114. data/test/clusterers/diana_test.rb +0 -69
  115. data/test/clusterers/k_means_test.rb +0 -100
  116. data/test/clusterers/median_linkage_test.rb +0 -53
  117. data/test/clusterers/single_linkage_test.rb +0 -122
  118. data/test/clusterers/ward_linkage_test.rb +0 -53
  119. data/test/clusterers/weighted_average_linkage_test.rb +0 -53
  120. data/test/data/data_set_test.rb +0 -96
  121. data/test/data/proximity_test.rb +0 -81
  122. data/test/data/statistics_test.rb +0 -65
  123. data/test/experiment/classifier_evaluator_test.rb +0 -76
  124. data/test/genetic_algorithm/chromosome_test.rb +0 -57
  125. data/test/genetic_algorithm/genetic_algorithm_test.rb +0 -81
  126. data/test/neural_network/backpropagation_test.rb +0 -82
  127. data/test/neural_network/hopfield_test.rb +0 -72
  128. data/test/som/som_test.rb +0 -97
@@ -0,0 +1,96 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Author:: OpenAI Assistant
4
+ # License:: MPL 1.1
5
+ # Project:: ai4r
6
+ # Url:: https://github.com/SergioFierens/ai4r
7
+ #
8
+ # You can redistribute it and/or modify it under the terms of
9
+ # the Mozilla Public License version 1.1 as published by the
10
+ # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
11
+
12
+ require_relative '../data/data_set'
13
+ require_relative 'classifier'
14
+
15
+ module Ai4r
16
+ module Classifiers
17
+ # Implementation of binary Logistic Regression using gradient descent.
18
+ #
19
+ # Training data must have numeric attributes with the last attribute being
20
+ # the class label (0 or 1). Parameters can be adjusted with
21
+ # {Parameterizable#set_parameters}.
22
+ #
23
+ # Example:
24
+ # data = Ai4r::Data::DataSet.new(:data_items => [[0.2, 1], [0.4, 0]])
25
+ # classifier = LogisticRegression.new.build(data)
26
+ # classifier.eval([0.3])
27
+ class LogisticRegression < Classifier
28
+ attr_reader :weights
29
+
30
+ parameters_info learning_rate: 'Learning rate for gradient descent.',
31
+ iterations: 'Number of iterations to train.'
32
+
33
+ def initialize
34
+ super()
35
+ @learning_rate = 0.1
36
+ @iterations = 1000
37
+ @weights = nil
38
+ end
39
+
40
+ # Train the logistic regression classifier using the provided dataset.
41
+ def build(data_set)
42
+ raise 'Error instance must be passed' unless data_set.is_a?(Ai4r::Data::DataSet)
43
+
44
+ data_set.check_not_empty
45
+
46
+ x = data_set.data_items.map { |item| item[0...-1].map(&:to_f) }
47
+ y = data_set.data_items.map { |item| item.last.to_f }
48
+ m = x.length
49
+ n = x.first.length
50
+ @weights = Array.new(n + 1, 0.0) # last value is bias
51
+
52
+ @iterations.times do
53
+ predictions = x.map do |row|
54
+ z = row.each_with_index.inject(@weights.last) { |s, (v, j)| s + (v * @weights[j]) }
55
+ 1.0 / (1.0 + Math.exp(-z))
56
+ end
57
+ errors = predictions.zip(y).map { |p, label| p - label }
58
+
59
+ n.times do |j|
60
+ grad = (0...m).inject(0.0) { |sum, i| sum + (errors[i] * x[i][j]) } / m
61
+ @weights[j] -= @learning_rate * grad
62
+ end
63
+ bias_grad = errors.sum / m
64
+ @weights[n] -= @learning_rate * bias_grad
65
+ end
66
+ self
67
+ end
68
+
69
+ # Predict the class (0 or 1) for the given data array.
70
+ def eval(data)
71
+ raise 'Model not trained' unless @weights
72
+
73
+ expected_size = @weights.length - 1
74
+ if data.length != expected_size
75
+ raise ArgumentError,
76
+ "Wrong number of inputs. Expected: #{expected_size}, " \
77
+ "received: #{data.length}."
78
+ end
79
+
80
+ z = data.each_with_index.inject(@weights.last) do |s, (v, j)|
81
+ s + (v.to_f * @weights[j])
82
+ end
83
+ prob = 1.0 / (1.0 + Math.exp(-z))
84
+ prob >= 0.5 ? 1 : 0
85
+ end
86
+
87
+ # Logistic Regression classifiers cannot generate human readable rules.
88
+ #
89
+ # This method returns a string explaining that rule extraction is not
90
+ # supported for this algorithm.
91
+ def get_rules
92
+ 'LogisticRegression does not support rule extraction.'
93
+ end
94
+ end
95
+ end
96
+ end
@@ -1,104 +1,118 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Author:: Sergio Fierens (Implementation only)
2
4
  # License:: MPL 1.1
3
5
  # Project:: ai4r
4
- # Url:: http://ai4r.org/
6
+ # Url:: https://github.com/SergioFierens/ai4r
5
7
  #
6
- # You can redistribute it and/or modify it under the terms of
7
- # the Mozilla Public License version 1.1 as published by the
8
+ # You can redistribute it and/or modify it under the terms of
9
+ # the Mozilla Public License version 1.1 as published by the
8
10
  # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
11
 
10
- require File.dirname(__FILE__) + '/../data/data_set.rb'
11
- require File.dirname(__FILE__) + '/../classifiers/classifier'
12
- require File.dirname(__FILE__) + '/../neural_network/backpropagation'
12
+ require_relative '../data/data_set'
13
+ require_relative '../classifiers/classifier'
14
+ require_relative '../neural_network/backpropagation'
13
15
 
14
16
  module Ai4r
15
17
  module Classifiers
16
-
17
18
  # = Introduction
18
- #
19
- # The idea behind the MultilayerPerceptron classifier is to
20
- # train a Multilayer Perceptron neural network with the provided examples,
19
+ #
20
+ # The idea behind the MultilayerPerceptron classifier is to
21
+ # train a Multilayer Perceptron neural network with the provided examples,
21
22
  # and predict the class for new data items.
22
- #
23
+ #
23
24
  # = Parameters
24
- #
25
+ #
25
26
  # Use class method get_parameters_info to obtain details on the algorithm
26
27
  # parameters. Use set_parameters to set values for this parameters.
27
28
  # See Parameterizable module documentation.
28
- #
29
- # * :network_class => Neural network implementation class.
29
+ #
30
+ # * :network_class => Neural network implementation class.
30
31
  # By default: Ai4r::NeuralNetwork::Backpropagation.
31
32
  # * :network_parameters => Parameters to be forwarded to the back end
32
- # neural ntework.
33
- # * :hidden_layers => Hidden layer structure. E.g. [8, 6] will generate
33
+ # neural ntework.
34
+ # * :hidden_layers => Hidden layer structure. E.g. [8, 6] will generate
34
35
  # 2 hidden layers with 8 and 6 neurons each. By default []
35
- # * :training_iterations => How many times the training should be repeated.
36
- # By default: 1000.
37
- # :active_node_value => Default: 1
36
+ # * :training_iterations => How many times the training should be repeated.
37
+ # By default: 500.
38
+ # :active_node_value => Default: 1
38
39
  # :inactive_node_value => Default: 1
39
40
  class MultilayerPerceptron < Classifier
40
-
41
41
  attr_reader :data_set, :class_value, :network, :domains
42
-
43
- parameters_info :network_class => "Neural network implementation class."+
44
- "By default: Ai4r::NeuralNetwork::Backpropagation.",
45
- :network_parameters => "parameters to be forwarded to the back end " +
46
- "neural network.",
47
- :hidden_layers => "Hidden layer structure. E.g. [8, 6] will generate " +
48
- "2 hidden layers with 8 and 6 neurons each. By default []",
49
- :training_iterations => "How many times the training should be " +
50
- "repeated. By default: 1000",
51
- :active_node_value => "Default: 1",
52
- :inactive_node_value => "Default: 0"
53
-
42
+
43
+ TRAINING_ITERATIONS = 500
44
+
45
+ parameters_info network_class: 'Neural network implementation class.' \
46
+ 'By default: Ai4r::NeuralNetwork::Backpropagation.',
47
+ network_parameters: 'parameters to be forwarded to the back end ' \
48
+ 'neural network.',
49
+ hidden_layers: 'Hidden layer structure. E.g. [8, 6] will generate ' \
50
+ '2 hidden layers with 8 and 6 neurons each. By default []',
51
+ training_iterations: 'How many times the training should be ' \
52
+ "repeated. By default: #{TRAINING_ITERATIONS}",
53
+ active_node_value: 'Default: 1',
54
+ inactive_node_value: 'Default: 0'
55
+
56
+ # @return [Object]
54
57
  def initialize
58
+ super()
55
59
  @network_class = Ai4r::NeuralNetwork::Backpropagation
56
60
  @hidden_layers = []
57
- @training_iterations = 500
61
+ @training_iterations = TRAINING_ITERATIONS
58
62
  @network_parameters = {}
59
63
  @active_node_value = 1
60
64
  @inactive_node_value = 0
61
65
  end
62
-
63
- # Build a new MultilayerPerceptron classifier. You must provide a DataSet
64
- # instance as parameter. The last attribute of each item is considered as
66
+
67
+ # Build a new MultilayerPerceptron classifier. You must provide a DataSet
68
+ # instance as parameter. The last attribute of each item is considered as
65
69
  # the item class.
70
+ # @param data_set [Object]
71
+ # @return [Object]
66
72
  def build(data_set)
67
73
  data_set.check_not_empty
68
74
  @data_set = data_set
69
- @domains = @data_set.build_domains.collect {|domain| domain.to_a}
75
+ @domains = @data_set.build_domains.collect(&:to_a)
70
76
  @outputs = @domains.last.length
71
77
  @inputs = 0
72
- @domains[0...-1].each {|domain| @inputs += domain.length}
78
+ @domains[0...-1].each { |domain| @inputs += domain.length }
73
79
  @structure = [@inputs] + @hidden_layers + [@outputs]
74
80
  @network = @network_class.new @structure
75
- @training_iterations.times do
76
- data_set.data_items.each do |data_item|
77
- input_values = data_to_input(data_item[0...-1])
78
- output_values = data_to_output(data_item.last)
79
- @network.train(input_values, output_values)
80
- end
81
+ inputs = []
82
+ outputs = []
83
+ data_set.data_items.each do |data_item|
84
+ inputs << data_to_input(data_item[0...-1])
85
+ outputs << data_to_output(data_item.last)
81
86
  end
82
- return self
87
+ @network.train_epochs(inputs, outputs,
88
+ epochs: @training_iterations, batch_size: 1)
89
+ self
83
90
  end
84
-
91
+ # rubocop:enable Metrics/AbcSize
92
+
85
93
  # You can evaluate new data, predicting its class.
86
94
  # e.g.
87
95
  # classifier.eval(['New York', '<30', 'F']) # => 'Y'
96
+ # @param data [Object]
97
+ # @return [Object]
88
98
  def eval(data)
89
99
  input_values = data_to_input(data)
90
100
  output_values = @network.eval(input_values)
91
- return @domains.last[get_max_index(output_values)]
101
+ @domains.last[get_max_index(output_values)]
92
102
  end
93
-
94
- # Multilayer Perceptron Classifiers cannot generate
103
+
104
+ # Multilayer Perceptron Classifiers cannot generate
95
105
  # human-readable rules.
106
+ # @return [Object]
96
107
  def get_rules
97
- return "raise 'Neural networks classifiers do not generate human-readable rules.'"
108
+ "raise 'Neural networks classifiers do not generate human-readable rules.'"
98
109
  end
110
+ # rubocop:enable Naming/AccessorMethodName
99
111
 
100
112
  protected
101
-
113
+
114
+ # @param data_item [Object]
115
+ # @return [Object]
102
116
  def data_to_input(data_item)
103
117
  input_values = Array.new(@inputs, @inactive_node_value)
104
118
  accum_index = 0
@@ -106,17 +120,21 @@ module Ai4r
106
120
  att_value = data_item[att_index]
107
121
  domain_index = @domains[att_index].index(att_value)
108
122
  input_values[domain_index + accum_index] = @active_node_value
109
- accum_index = @domains[att_index].length
123
+ accum_index += @domains[att_index].length
110
124
  end
111
- return input_values
125
+ input_values
112
126
  end
113
-
127
+
128
+ # @param data_item [Object]
129
+ # @return [Object]
114
130
  def data_to_output(data_item)
115
131
  output_values = Array.new(@outputs, @inactive_node_value)
116
132
  output_values[@domains.last.index(data_item)] = @active_node_value
117
- return output_values
133
+ output_values
118
134
  end
119
-
135
+
136
+ # @param output_values [Object]
137
+ # @return [Object]
120
138
  def get_max_index(output_values)
121
139
  max_value = @inactive_node_value
122
140
  max_index = 0
@@ -126,10 +144,8 @@ module Ai4r
126
144
  max_index = output_index
127
145
  end
128
146
  end
129
- return max_index
147
+ max_index
130
148
  end
131
-
132
149
  end
133
-
134
150
  end
135
151
  end
@@ -1,19 +1,19 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Author:: Thomas Kern
2
4
  # License:: MPL 1.1
3
5
  # Project:: ai4r
4
- # Url:: http://ai4r.org/
6
+ # Url:: https://github.com/SergioFierens/ai4r
5
7
  #
6
8
  # You can redistribute it and/or modify it under the terms of
7
9
  # the Mozilla Public License version 1.1 as published by the
8
10
  # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
11
 
10
- require File.dirname(__FILE__) + '/../data/data_set'
11
- require File.dirname(__FILE__) + '/classifier'
12
+ require_relative '../data/data_set'
13
+ require_relative 'classifier'
12
14
 
13
15
  module Ai4r
14
16
  module Classifiers
15
-
16
-
17
17
  # = Introduction
18
18
  #
19
19
  # This is an implementation of a Naive Bayesian Classifier without any
@@ -21,7 +21,7 @@ module Ai4r
21
21
  # Probabilities P(a_i | v_j) are estimated using m-estimates, hence the
22
22
  # m parameter as second parameter when isntantiating the class.
23
23
  # The estimation looks like this:
24
- #(n_c + mp) / (n + m)
24
+ # (n_c + mp) / (n + m)
25
25
  #
26
26
  # the variables are:
27
27
  # n = the number of training examples for which v = v_j
@@ -54,14 +54,21 @@ module Ai4r
54
54
  # build data
55
55
  # b.eval(["Red", "SUV", "Domestic"])
56
56
  #
57
-
57
+
58
+ # Probabilistic classifier based on Bayes' theorem.
58
59
  class NaiveBayes < Classifier
60
+ attr_reader :class_prob, :pcc, :pcp
59
61
 
60
- parameters_info :m => "Default value is set to 0. It may be set to a value greater than " +
61
- "0 when the size of the dataset is relatively small"
62
-
62
+ parameters_info m: 'Default value is set to 0. It may be set to a value greater than ' \
63
+ '0 when the size of the dataset is relatively small',
64
+ unknown_value_strategy: 'Behaviour when evaluating unseen attribute values: ' \
65
+ ':ignore (default), :uniform or :error.'
66
+
67
+ # @return [Object]
63
68
  def initialize
69
+ super()
64
70
  @m = 0
71
+ @unknown_value_strategy = :ignore
65
72
  @class_counts = []
66
73
  @class_prob = [] # stores the probability of the classes
67
74
  @pcc = [] # stores the number of instances divided into attribute/value/class
@@ -69,144 +76,199 @@ module Ai4r
69
76
  @klass_index = {} # hashmap for quick lookup of all the used klasses and their indice
70
77
  @values = {} # hashmap for quick lookup of all the values
71
78
  end
72
-
79
+
73
80
  # You can evaluate new data, predicting its category.
74
81
  # e.g.
75
82
  # b.eval(["Red", "SUV", "Domestic"])
76
83
  # => 'No'
84
+ # @param data [Object]
85
+ # @return [Object]
77
86
  def eval(data)
78
- prob = @class_prob.map {|cp| cp}
87
+ prob = @class_prob.dup
79
88
  prob = calculate_class_probabilities_for_entry(data, prob)
80
89
  index_to_klass(prob.index(prob.max))
81
90
  end
82
91
 
83
92
  # Calculates the probabilities for the data entry Data.
84
93
  # data has to be an array of the same dimension as the training data minus the
85
- # class column.
94
+ # class column.
86
95
  # Returns a map containint all classes as keys:
87
96
  # {Class_1 => probability, Class_2 => probability2 ... }
88
97
  # Probability is <= 1 and of type Float.
89
98
  # e.g.
90
99
  # b.get_probability_map(["Red", "SUV", "Domestic"])
91
100
  # => {"Yes"=>0.4166666666666667, "No"=>0.5833333333333334}
101
+ # @param data [Object]
102
+ # @return [Object]
92
103
  def get_probability_map(data)
93
- prob = @class_prob.map {|cp| cp}
104
+ prob = @class_prob.dup
94
105
  prob = calculate_class_probabilities_for_entry(data, prob)
95
106
  prob = normalize_class_probability prob
96
107
  probability_map = {}
97
108
  prob.each_with_index { |p, i| probability_map[index_to_klass(i)] = p }
98
- return probability_map
109
+
110
+ probability_map
99
111
  end
100
112
 
101
113
  # counts values of the attribute instances and calculates the probability of the classes
102
114
  # and the conditional probabilities
103
115
  # Parameter data has to be an instance of CsvDataSet
116
+ # @param data [Object]
117
+ # @return [Object]
104
118
  def build(data)
105
- raise "Error instance must be passed" unless data.is_a?(DataSet)
106
- raise "Data should not be empty" if data.data_items.length == 0
119
+ raise 'Error instance must be passed' unless data.is_a?(Ai4r::Data::DataSet)
120
+ raise 'Data should not be empty' if data.data_items.empty?
107
121
 
108
122
  initialize_domain_data(data)
109
123
  initialize_klass_index
110
124
  initialize_pc
111
125
  calculate_probabilities
112
126
 
113
- return self
127
+ self
128
+ end
129
+
130
+ # Naive Bayes classifiers cannot generate human readable rules.
131
+ # This method returns a descriptive string explaining that rule
132
+ # extraction is not supported for this algorithm.
133
+ def get_rules
134
+ 'NaiveBayes does not support rule extraction.'
114
135
  end
115
136
 
116
137
  private
117
138
 
139
+ # @param data [Object]
140
+ # @return [Object]
118
141
  def initialize_domain_data(data)
119
142
  @domains = data.build_domains
120
143
  @data_items = data.data_items.map { |item| DataEntry.new(item[0...-1], item.last) }
121
144
  @data_labels = data.data_labels[0...-1]
122
- @klasses = @domains.last.to_a
145
+ @klasses = @domains.last.to_a.sort
123
146
  end
124
147
 
125
-
126
148
  # calculates the klass probability of a data entry
127
149
  # as usual, the probability of the value is multiplied with every conditional
128
150
  # probability of every attribute in condition to a specific class
129
151
  # this is repeated for every class
152
+ # @param data [Object]
153
+ # @param prob [Object]
154
+ # @return [Object]
130
155
  def calculate_class_probabilities_for_entry(data, prob)
131
- prob.each_with_index do |prob_entry, prob_index|
156
+ 0.upto(prob.length - 1) do |prob_index|
132
157
  data.each_with_index do |att, index|
133
- next if value_index(att, index).nil?
134
- prob[prob_index] *= @pcp[index][value_index(att, index)][prob_index]
158
+ val_index = value_index(att, index)
159
+ if val_index.nil?
160
+ case @unknown_value_strategy
161
+ when :ignore
162
+ next
163
+ when :uniform
164
+ value_count = @pcc[index].count { |arr| arr[prob_index].positive? }
165
+ value_count = 1 if value_count.zero?
166
+ prob[prob_index] *= 1.0 / value_count
167
+ when :error
168
+ raise "Unknown value '#{att}' for attribute #{@data_labels[index]}"
169
+ else
170
+ next
171
+ end
172
+ else
173
+ prob[prob_index] *= @pcp[index][val_index][prob_index]
174
+ end
135
175
  end
176
+ # rubocop:enable Metrics/ClassLength
136
177
  end
178
+
179
+ prob
137
180
  end
138
181
 
139
182
  # normalises the array of probabilities so the sum of the array equals 1
183
+ # @param prob [Object]
184
+ # @return [Object]
140
185
  def normalize_class_probability(prob)
141
186
  prob_sum = sum(prob)
142
- prob_sum > 0 ?
143
- prob.map {|prob_entry| prob_entry / prob_sum } :
187
+ if prob_sum.positive?
188
+ prob.map { |prob_entry| prob_entry / prob_sum }
189
+ else
144
190
  prob
191
+ end
145
192
  end
146
193
 
147
194
  # sums an array up; returns a number of type Float
195
+ # @param array [Object]
196
+ # @return [Object]
148
197
  def sum(array)
149
- array.inject(0.0){|b, i| b+i}
198
+ array.sum(0.0)
150
199
  end
151
200
 
152
201
  # returns the name of the class when the index is found
202
+ # @param index [Object]
203
+ # @return [Object]
153
204
  def index_to_klass(index)
154
- @klass_index.has_value?(index) ? @klass_index.key(index) : nil
205
+ @klass_index.value?(index) ? @klass_index.key(index) : nil
155
206
  end
156
207
 
157
208
  # initializes @values and @klass_index; maps a certain value to a uniq index
209
+ # @return [Object]
158
210
  def initialize_klass_index
159
211
  @klasses.each_with_index do |dl, index|
160
212
  @klass_index[dl] = index
161
213
  end
162
214
 
163
- @data_labels.each_with_index do |dl, index|
215
+ 0.upto(@data_labels.length - 1) do |index|
164
216
  @values[index] = {}
165
- @domains[index].each_with_index do |d, d_index|
217
+ @domains[index].to_a.sort.each_with_index do |d, d_index|
166
218
  @values[index][d] = d_index
167
219
  end
168
220
  end
169
221
  end
170
222
 
171
223
  # returns the index of a class
224
+ # @param klass [Object]
225
+ # @return [Object]
172
226
  def klass_index(klass)
173
227
  @klass_index[klass]
174
228
  end
175
229
 
176
230
  # returns the index of a value, depending on the attribute index
231
+ # @param value [Object]
232
+ # @param dl_index [Object]
233
+ # @return [Object]
177
234
  def value_index(value, dl_index)
178
235
  @values[dl_index][value]
179
236
  end
180
237
 
181
238
  # builds an array of the form:
182
239
  # array[attributes][values][classes]
183
- def build_array(dl, index)
240
+ # @param index [Object]
241
+ # @return [Object]
242
+ def build_array(index)
184
243
  domains = Array.new(@domains[index].length)
185
- domains.map do |p1|
186
- pl = Array.new @klasses.length, 0
244
+ domains.map do
245
+ Array.new @klasses.length, 0
187
246
  end
188
247
  end
189
248
 
190
249
  # initializes the two array for storing the count and conditional probabilities of
191
250
  # the attributes
251
+ # @return [Object]
192
252
  def initialize_pc
193
- @data_labels.each_with_index do |dl, index|
194
- @pcc << build_array(dl, index)
195
- @pcp << build_array(dl, index)
253
+ 0.upto(@data_labels.length - 1) do |index|
254
+ @pcc << build_array(index)
255
+ @pcp << build_array(index)
196
256
  end
197
257
  end
198
258
 
199
259
  # calculates the occurrences of a class and the instances of a certain value of a
200
260
  # certain attribute and the assigned class.
201
261
  # In addition to that, it also calculates the conditional probabilities and values
262
+ # @return [Object]
202
263
  def calculate_probabilities
203
- @klasses.each {|dl| @class_counts[klass_index(dl)] = 0}
264
+ @klasses.each { |dl| @class_counts[klass_index(dl)] = 0 }
204
265
 
205
266
  calculate_class_probabilities
206
267
  count_instances
207
268
  calculate_conditional_probabilities
208
269
  end
209
270
 
271
+ # @return [Object]
210
272
  def calculate_class_probabilities
211
273
  @data_items.each do |entry|
212
274
  @class_counts[klass_index(entry.klass)] += 1
@@ -218,48 +280,50 @@ module Ai4r
218
280
  end
219
281
 
220
282
  # counts the instances of a certain value of a certain attribute and the assigned class
283
+ # @return [Object]
221
284
  def count_instances
222
285
  @data_items.each do |item|
223
- @data_labels.each_with_index do |dl, dl_index|
286
+ 0.upto(@data_labels.length - 1) do |dl_index|
224
287
  @pcc[dl_index][value_index(item[dl_index], dl_index)][klass_index(item.klass)] += 1
225
288
  end
226
289
  end
227
290
  end
228
291
 
229
292
  # calculates the conditional probability and stores it in the @pcp-array
293
+ # @return [Object]
230
294
  def calculate_conditional_probabilities
231
295
  @pcc.each_with_index do |attributes, a_index|
232
296
  attributes.each_with_index do |values, v_index|
233
297
  values.each_with_index do |klass, k_index|
234
- @pcp[a_index][v_index][k_index] = (klass.to_f + @m * @class_prob[k_index]) / (@class_counts[k_index] + @m).to_f
298
+ @pcp[a_index][v_index][k_index] =
299
+ (klass.to_f + (@m * @class_prob[k_index])) / (@class_counts[k_index] + @m)
235
300
  end
236
301
  end
237
302
  end
238
303
  end
239
304
 
240
- #DataEntry stores the instance of the data entry
241
- #the data is accessible via entries
242
- #stores the class-column in the attribute klass and
243
- #removes the column for the class-entry
305
+ # DataEntry stores the instance of the data entry
306
+ # the data is accessible via entries
307
+ # stores the class-column in the attribute klass and
308
+ # removes the column for the class-entry
244
309
  class DataEntry
245
310
  attr_accessor :klass, :entries
246
311
 
312
+ # @param attributes [Object]
313
+ # @param klass [Object]
314
+ # @return [Object]
247
315
  def initialize(attributes, klass)
248
316
  @klass = klass
249
317
  @entries = attributes
250
318
  end
251
319
 
252
320
  # wrapper method for the access to @entries
321
+ # @param index [Object]
322
+ # @return [Object]
253
323
  def [](index)
254
324
  @entries[index]
255
325
  end
256
326
  end
257
-
258
327
  end
259
328
  end
260
329
  end
261
-
262
- # Monkeypatch to support both ruby 1.8 and 1.9 (key vs index method)
263
- class Hash
264
- alias_method(:key, :index) unless method_defined?(:key)
265
- end