RubyGems - ai4r - Versions diffs - 1.12 → 2.0 - Mend

ai4r 1.12 → 2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (128) hide show

checksums.yaml +7 -0
data/README.md +174 -0
data/examples/classifiers/hyperpipes_data.csv +14 -0
data/examples/classifiers/hyperpipes_example.rb +22 -0
data/examples/classifiers/ib1_example.rb +12 -0
data/examples/classifiers/id3_example.rb +15 -10
data/examples/classifiers/id3_graphviz_example.rb +17 -0
data/examples/classifiers/logistic_regression_example.rb +11 -0
data/examples/classifiers/naive_bayes_attributes_example.rb +13 -0
data/examples/classifiers/naive_bayes_example.rb +12 -13
data/examples/classifiers/one_r_example.rb +27 -0
data/examples/classifiers/parameter_tutorial.rb +29 -0
data/examples/classifiers/prism_nominal_example.rb +15 -0
data/examples/classifiers/prism_numeric_example.rb +21 -0
data/examples/classifiers/simple_linear_regression_example.csv +159 -0
data/examples/classifiers/simple_linear_regression_example.rb +18 -0
data/examples/classifiers/zero_and_one_r_example.rb +34 -0
data/examples/classifiers/zero_one_r_data.csv +8 -0
data/examples/clusterers/clusterer_example.rb +62 -0
data/examples/clusterers/dbscan_example.rb +17 -0
data/examples/clusterers/dendrogram_example.rb +17 -0
data/examples/clusterers/hierarchical_dendrogram_example.rb +20 -0
data/examples/clusterers/kmeans_custom_example.rb +26 -0
data/examples/genetic_algorithm/bitstring_example.rb +41 -0
data/examples/genetic_algorithm/genetic_algorithm_example.rb +26 -18
data/examples/genetic_algorithm/kmeans_seed_tuning.rb +45 -0
data/examples/neural_network/backpropagation_example.rb +49 -48
data/examples/neural_network/hopfield_example.rb +45 -0
data/examples/neural_network/patterns_with_base_noise.rb +39 -39
data/examples/neural_network/patterns_with_noise.rb +41 -39
data/examples/neural_network/train_epochs_callback.rb +25 -0
data/examples/neural_network/training_patterns.rb +39 -39
data/examples/neural_network/transformer_text_classification.rb +78 -0
data/examples/neural_network/xor_example.rb +23 -22
data/examples/reinforcement/q_learning_example.rb +10 -0
data/examples/som/som_data.rb +155 -152
data/examples/som/som_multi_node_example.rb +12 -13
data/examples/som/som_single_example.rb +12 -15
data/examples/transformer/decode_classifier_example.rb +68 -0
data/examples/transformer/deterministic_example.rb +10 -0
data/examples/transformer/seq2seq_example.rb +16 -0
data/lib/ai4r/classifiers/classifier.rb +24 -16
data/lib/ai4r/classifiers/gradient_boosting.rb +64 -0
data/lib/ai4r/classifiers/hyperpipes.rb +119 -43
data/lib/ai4r/classifiers/ib1.rb +122 -32
data/lib/ai4r/classifiers/id3.rb +527 -144
data/lib/ai4r/classifiers/logistic_regression.rb +96 -0
data/lib/ai4r/classifiers/multilayer_perceptron.rb +75 -59
data/lib/ai4r/classifiers/naive_bayes.rb +112 -48
data/lib/ai4r/classifiers/one_r.rb +112 -44
data/lib/ai4r/classifiers/prism.rb +167 -76
data/lib/ai4r/classifiers/random_forest.rb +72 -0
data/lib/ai4r/classifiers/simple_linear_regression.rb +143 -0
data/lib/ai4r/classifiers/support_vector_machine.rb +91 -0
data/lib/ai4r/classifiers/votes.rb +57 -0
data/lib/ai4r/classifiers/zero_r.rb +71 -30
data/lib/ai4r/clusterers/average_linkage.rb +46 -27
data/lib/ai4r/clusterers/bisecting_k_means.rb +50 -44
data/lib/ai4r/clusterers/centroid_linkage.rb +52 -36
data/lib/ai4r/clusterers/cluster_tree.rb +50 -0
data/lib/ai4r/clusterers/clusterer.rb +28 -24
data/lib/ai4r/clusterers/complete_linkage.rb +42 -31
data/lib/ai4r/clusterers/dbscan.rb +134 -0
data/lib/ai4r/clusterers/diana.rb +75 -49
data/lib/ai4r/clusterers/k_means.rb +309 -72
data/lib/ai4r/clusterers/median_linkage.rb +49 -33
data/lib/ai4r/clusterers/single_linkage.rb +196 -88
data/lib/ai4r/clusterers/ward_linkage.rb +51 -35
data/lib/ai4r/clusterers/ward_linkage_hierarchical.rb +63 -0
data/lib/ai4r/clusterers/weighted_average_linkage.rb +48 -32
data/lib/ai4r/data/data_set.rb +229 -100
data/lib/ai4r/data/parameterizable.rb +31 -25
data/lib/ai4r/data/proximity.rb +72 -50
data/lib/ai4r/data/statistics.rb +46 -35
data/lib/ai4r/experiment/classifier_evaluator.rb +84 -32
data/lib/ai4r/experiment/split.rb +39 -0
data/lib/ai4r/genetic_algorithm/chromosome_base.rb +43 -0
data/lib/ai4r/genetic_algorithm/genetic_algorithm.rb +92 -170
data/lib/ai4r/genetic_algorithm/tsp_chromosome.rb +83 -0
data/lib/ai4r/hmm/hidden_markov_model.rb +134 -0
data/lib/ai4r/neural_network/activation_functions.rb +37 -0
data/lib/ai4r/neural_network/backpropagation.rb +419 -143
data/lib/ai4r/neural_network/hopfield.rb +175 -58
data/lib/ai4r/neural_network/transformer.rb +194 -0
data/lib/ai4r/neural_network/weight_initializations.rb +40 -0
data/lib/ai4r/reinforcement/policy_iteration.rb +66 -0
data/lib/ai4r/reinforcement/q_learning.rb +51 -0
data/lib/ai4r/search/a_star.rb +76 -0
data/lib/ai4r/search/bfs.rb +50 -0
data/lib/ai4r/search/dfs.rb +50 -0
data/lib/ai4r/search/mcts.rb +118 -0
data/lib/ai4r/search.rb +12 -0
data/lib/ai4r/som/distance_metrics.rb +29 -0
data/lib/ai4r/som/layer.rb +28 -17
data/lib/ai4r/som/node.rb +61 -32
data/lib/ai4r/som/som.rb +158 -41
data/lib/ai4r/som/two_phase_layer.rb +21 -25
data/lib/ai4r/version.rb +3 -0
data/lib/ai4r.rb +58 -27
metadata +117 -106
data/README.rdoc +0 -44
data/test/classifiers/hyperpipes_test.rb +0 -84
data/test/classifiers/ib1_test.rb +0 -78
data/test/classifiers/id3_test.rb +0 -208
data/test/classifiers/multilayer_perceptron_test.rb +0 -79
data/test/classifiers/naive_bayes_test.rb +0 -43
data/test/classifiers/one_r_test.rb +0 -62
data/test/classifiers/prism_test.rb +0 -85
data/test/classifiers/zero_r_test.rb +0 -50
data/test/clusterers/average_linkage_test.rb +0 -51
data/test/clusterers/bisecting_k_means_test.rb +0 -66
data/test/clusterers/centroid_linkage_test.rb +0 -53
data/test/clusterers/complete_linkage_test.rb +0 -57
data/test/clusterers/diana_test.rb +0 -69
data/test/clusterers/k_means_test.rb +0 -100
data/test/clusterers/median_linkage_test.rb +0 -53
data/test/clusterers/single_linkage_test.rb +0 -122
data/test/clusterers/ward_linkage_test.rb +0 -53
data/test/clusterers/weighted_average_linkage_test.rb +0 -53
data/test/data/data_set_test.rb +0 -96
data/test/data/proximity_test.rb +0 -81
data/test/data/statistics_test.rb +0 -65
data/test/experiment/classifier_evaluator_test.rb +0 -76
data/test/genetic_algorithm/chromosome_test.rb +0 -57
data/test/genetic_algorithm/genetic_algorithm_test.rb +0 -81
data/test/neural_network/backpropagation_test.rb +0 -82
data/test/neural_network/hopfield_test.rb +0 -72
data/test/som/som_test.rb +0 -97

data/lib/ai4r/classifiers/logistic_regression.rb ADDED Viewed

@@ -0,0 +1,96 @@
+# frozen_string_literal: true
+# Author::    OpenAI Assistant
+# License::   MPL 1.1
+# Project::   ai4r
+# Url::       https://github.com/SergioFierens/ai4r
+#
+# You can redistribute it and/or modify it under the terms of
+# the Mozilla Public License version 1.1  as published by the
+# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
+require_relative '../data/data_set'
+require_relative 'classifier'
+module Ai4r
+  module Classifiers
+    # Implementation of binary Logistic Regression using gradient descent.
+    #
+    # Training data must have numeric attributes with the last attribute being
+    # the class label (0 or 1). Parameters can be adjusted with
+    # {Parameterizable#set_parameters}.
+    #
+    # Example:
+    #   data = Ai4r::Data::DataSet.new(:data_items => [[0.2, 1], [0.4, 0]])
+    #   classifier = LogisticRegression.new.build(data)
+    #   classifier.eval([0.3])
+    class LogisticRegression < Classifier
+      attr_reader :weights
+      parameters_info learning_rate: 'Learning rate for gradient descent.',
+                      iterations: 'Number of iterations to train.'
+      def initialize
+        super()
+        @learning_rate = 0.1
+        @iterations = 1000
+        @weights = nil
+      end
+      # Train the logistic regression classifier using the provided dataset.
+      def build(data_set)
+        raise 'Error instance must be passed' unless data_set.is_a?(Ai4r::Data::DataSet)
+        data_set.check_not_empty
+        x = data_set.data_items.map { |item| item[0...-1].map(&:to_f) }
+        y = data_set.data_items.map { |item| item.last.to_f }
+        m = x.length
+        n = x.first.length
+        @weights = Array.new(n + 1, 0.0) # last value is bias
+        @iterations.times do
+          predictions = x.map do |row|
+            z = row.each_with_index.inject(@weights.last) { |s, (v, j)| s + (v * @weights[j]) }
+            1.0 / (1.0 + Math.exp(-z))
+          end
+          errors = predictions.zip(y).map { |p, label| p - label }
+          n.times do |j|
+            grad = (0...m).inject(0.0) { |sum, i| sum + (errors[i] * x[i][j]) } / m
+            @weights[j] -= @learning_rate * grad
+          end
+          bias_grad = errors.sum / m
+          @weights[n] -= @learning_rate * bias_grad
+        end
+        self
+      end
+      # Predict the class (0 or 1) for the given data array.
+      def eval(data)
+        raise 'Model not trained' unless @weights
+        expected_size = @weights.length - 1
+        if data.length != expected_size
+          raise ArgumentError,
+                "Wrong number of inputs. Expected: #{expected_size}, " \
+                "received: #{data.length}."
+        end
+        z = data.each_with_index.inject(@weights.last) do |s, (v, j)|
+          s + (v.to_f * @weights[j])
+        end
+        prob = 1.0 / (1.0 + Math.exp(-z))
+        prob >= 0.5 ? 1 : 0
+      end
+      # Logistic Regression classifiers cannot generate human readable rules.
+      #
+      # This method returns a string explaining that rule extraction is not
+      # supported for this algorithm.
+      def get_rules
+        'LogisticRegression does not support rule extraction.'
+      end
+    end
+  end
+end

data/lib/ai4r/classifiers/multilayer_perceptron.rb CHANGED Viewed

@@ -1,104 +1,118 @@
+# frozen_string_literal: true
 # Author::    Sergio Fierens (Implementation only)
 # License::   MPL 1.1
 # Project::   ai4r
-# Url::       http://ai4r.org/
+# Url::       https://github.com/SergioFierens/ai4r
 #
-# You can redistribute it and/or modify it under the terms of
-# the Mozilla Public License version 1.1  as published by the
+# You can redistribute it and/or modify it under the terms of
+# the Mozilla Public License version 1.1  as published by the
 # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
-require File.dirname(__FILE__) + '/../data/data_set.rb'
-require File.dirname(__FILE__) + '/../classifiers/classifier'
-require File.dirname(__FILE__) + '/../neural_network/backpropagation'
+require_relative '../data/data_set'
+require_relative '../classifiers/classifier'
+require_relative '../neural_network/backpropagation'
 module Ai4r
   module Classifiers
     # = Introduction
-    #
-    # The idea behind the MultilayerPerceptron classifier is to
-    # train a Multilayer Perceptron neural network with the provided examples,
+    #
+    # The idea behind the MultilayerPerceptron classifier is to
+    # train a Multilayer Perceptron neural network with the provided examples,
     # and predict the class for new data items.
-    #
+    #
     # = Parameters
-    #
+    #
     # Use class method get_parameters_info to obtain details on the algorithm
     # parameters. Use set_parameters to set values for this parameters.
     # See Parameterizable module documentation.
-    #
-    # * :network_class => Neural network implementation class.
+    #
+    # * :network_class => Neural network implementation class.
     #   By default: Ai4r::NeuralNetwork::Backpropagation.
     # * :network_parameters => Parameters to be forwarded to the back end
-    #   neural ntework.
-    # * :hidden_layers => Hidden layer structure. E.g. [8, 6] will generate
+    #   neural ntework.
+    # * :hidden_layers => Hidden layer structure. E.g. [8, 6] will generate
     #   2 hidden layers with 8 and 6 neurons each. By default []
-    # * :training_iterations => How many times the training should be repeated.
-    #   By default: 1000.
-    # :active_node_value => Default: 1
+    # * :training_iterations => How many times the training should be repeated.
+    #   By default: 500.
+    # :active_node_value => Default: 1
     # :inactive_node_value => Default: 1
     class MultilayerPerceptron < Classifier
       attr_reader :data_set, :class_value, :network, :domains
-      parameters_info :network_class => "Neural network implementation class."+
-          "By default: Ai4r::NeuralNetwork::Backpropagation.",
-        :network_parameters => "parameters to be forwarded to the back end " +
-          "neural network.",
-        :hidden_layers => "Hidden layer structure. E.g. [8, 6] will generate " +
-          "2 hidden layers with 8 and 6 neurons each. By default []",
-        :training_iterations => "How many times the training should be " +
-          "repeated. By default: 1000",
-        :active_node_value => "Default: 1",
-        :inactive_node_value => "Default: 0"
+      TRAINING_ITERATIONS = 500
+      parameters_info network_class: 'Neural network implementation class.' \
+                                     'By default: Ai4r::NeuralNetwork::Backpropagation.',
+                      network_parameters: 'parameters to be forwarded to the back end ' \
+                                          'neural network.',
+                      hidden_layers: 'Hidden layer structure. E.g. [8, 6] will generate ' \
+                                     '2 hidden layers with 8 and 6 neurons each. By default []',
+                      training_iterations: 'How many times the training should be ' \
+                                           "repeated. By default: #{TRAINING_ITERATIONS}",
+                      active_node_value: 'Default: 1',
+                      inactive_node_value: 'Default: 0'
+      # @return [Object]
       def initialize
+        super()
         @network_class = Ai4r::NeuralNetwork::Backpropagation
         @hidden_layers = []
-        @training_iterations = 500
+        @training_iterations = TRAINING_ITERATIONS
         @network_parameters = {}
         @active_node_value = 1
         @inactive_node_value = 0
       end
-      # Build a new MultilayerPerceptron classifier. You must provide a DataSet
-      # instance as parameter. The last attribute of each item is considered as
+      # Build a new MultilayerPerceptron classifier. You must provide a DataSet
+      # instance as parameter. The last attribute of each item is considered as
       # the item class.
+      # @param data_set [Object]
+      # @return [Object]
       def build(data_set)
         data_set.check_not_empty
         @data_set = data_set
-        @domains = @data_set.build_domains.collect {|domain| domain.to_a}
+        @domains = @data_set.build_domains.collect(&:to_a)
         @outputs = @domains.last.length
         @inputs = 0
-        @domains[0...-1].each {|domain| @inputs += domain.length}
+        @domains[0...-1].each { |domain| @inputs += domain.length }
         @structure = [@inputs] + @hidden_layers + [@outputs]
         @network = @network_class.new @structure
-        @training_iterations.times do
-          data_set.data_items.each do |data_item|
-            input_values = data_to_input(data_item[0...-1])
-            output_values = data_to_output(data_item.last)
-            @network.train(input_values, output_values)
-          end
+        inputs = []
+        outputs = []
+        data_set.data_items.each do |data_item|
+          inputs << data_to_input(data_item[0...-1])
+          outputs << data_to_output(data_item.last)
         end
-        return self
+        @network.train_epochs(inputs, outputs,
+                              epochs: @training_iterations, batch_size: 1)
+        self
       end
+      # rubocop:enable Metrics/AbcSize
       # You can evaluate new data, predicting its class.
       # e.g.
       #   classifier.eval(['New York',  '<30', 'F'])  # => 'Y'
+      # @param data [Object]
+      # @return [Object]
       def eval(data)
         input_values = data_to_input(data)
         output_values = @network.eval(input_values)
-        return @domains.last[get_max_index(output_values)]
+        @domains.last[get_max_index(output_values)]
       end
-      # Multilayer Perceptron Classifiers cannot generate
+      # Multilayer Perceptron Classifiers cannot generate
       # human-readable rules.
+      # @return [Object]
       def get_rules
-        return "raise 'Neural networks classifiers do not generate human-readable rules.'"
+        "raise 'Neural networks classifiers do not generate human-readable rules.'"
       end
+      # rubocop:enable Naming/AccessorMethodName
       protected
+      # @param data_item [Object]
+      # @return [Object]
       def data_to_input(data_item)
         input_values = Array.new(@inputs, @inactive_node_value)
         accum_index = 0
@@ -106,17 +120,21 @@ module Ai4r
           att_value = data_item[att_index]
           domain_index = @domains[att_index].index(att_value)
           input_values[domain_index + accum_index] = @active_node_value
-          accum_index = @domains[att_index].length
+          accum_index += @domains[att_index].length
         end
-        return input_values
+        input_values
       end
+      # @param data_item [Object]
+      # @return [Object]
       def data_to_output(data_item)
         output_values = Array.new(@outputs, @inactive_node_value)
         output_values[@domains.last.index(data_item)] = @active_node_value
-        return output_values
+        output_values
       end
+      # @param output_values [Object]
+      # @return [Object]
       def get_max_index(output_values)
         max_value = @inactive_node_value
         max_index = 0
@@ -126,10 +144,8 @@ module Ai4r
             max_index = output_index
           end
         end
-        return max_index
+        max_index
       end
     end
   end
 end

data/lib/ai4r/classifiers/naive_bayes.rb CHANGED Viewed

@@ -1,19 +1,19 @@
+# frozen_string_literal: true
 # Author::    Thomas Kern
 # License::   MPL 1.1
 # Project::   ai4r
-# Url::       http://ai4r.org/
+# Url::       https://github.com/SergioFierens/ai4r
 #
 # You can redistribute it and/or modify it under the terms of
 # the Mozilla Public License version 1.1  as published by the
 # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
-require File.dirname(__FILE__) + '/../data/data_set'
-require File.dirname(__FILE__) + '/classifier'
+require_relative '../data/data_set'
+require_relative 'classifier'
 module Ai4r
   module Classifiers
     # = Introduction
     #
     # This is an implementation of a Naive Bayesian Classifier without any
@@ -21,7 +21,7 @@ module Ai4r
     # Probabilities P(a_i | v_j) are estimated using m-estimates, hence the
     # m parameter as second parameter when isntantiating the class.
     # The estimation looks like this:
-    #(n_c + mp) / (n + m)
+    # (n_c + mp) / (n + m)
     #
     # the variables are:
     # n = the number of training examples for which v = v_j
@@ -54,14 +54,21 @@ module Ai4r
     #     build data
     #   b.eval(["Red", "SUV", "Domestic"])
     #
+    # Probabilistic classifier based on Bayes' theorem.
     class NaiveBayes < Classifier
+      attr_reader :class_prob, :pcc, :pcp
-      parameters_info :m => "Default value is set to 0. It may be set to a value greater than " +
-        "0 when the size of the dataset is relatively small"
+      parameters_info m: 'Default value is set to 0. It may be set to a value greater than ' \
+                         '0 when the size of the dataset is relatively small',
+                      unknown_value_strategy: 'Behaviour when evaluating unseen attribute values: ' \
+                                              ':ignore (default), :uniform or :error.'
+      # @return [Object]
       def initialize
+        super()
         @m = 0
+        @unknown_value_strategy = :ignore
         @class_counts = []
         @class_prob = [] # stores the probability of the classes
         @pcc = [] # stores the number of instances divided into attribute/value/class
@@ -69,144 +76,199 @@ module Ai4r
         @klass_index = {} # hashmap for quick lookup of all the used klasses and their indice
         @values = {} # hashmap for quick lookup of all the values
       end
       # You can evaluate new data, predicting its category.
       # e.g.
       #   b.eval(["Red", "SUV", "Domestic"])
       #     => 'No'
+      # @param data [Object]
+      # @return [Object]
       def eval(data)
-        prob = @class_prob.map {|cp| cp}
+        prob = @class_prob.dup
         prob = calculate_class_probabilities_for_entry(data, prob)
         index_to_klass(prob.index(prob.max))
       end
       # Calculates the probabilities for the data entry Data.
       # data has to be an array of the same dimension as the training data minus the
-      # class column.
+      # class column.
       # Returns a map containint all classes as keys:
       # {Class_1 => probability, Class_2 => probability2 ... }
       # Probability is <= 1 and of type Float.
       # e.g.
       #   b.get_probability_map(["Red", "SUV", "Domestic"])
       #     => {"Yes"=>0.4166666666666667, "No"=>0.5833333333333334}
+      # @param data [Object]
+      # @return [Object]
       def get_probability_map(data)
-        prob = @class_prob.map {|cp| cp}
+        prob = @class_prob.dup
         prob = calculate_class_probabilities_for_entry(data, prob)
         prob = normalize_class_probability prob
         probability_map = {}
         prob.each_with_index { |p, i| probability_map[index_to_klass(i)] = p }
-        return probability_map
+        probability_map
       end
       # counts values of the attribute instances and calculates the probability of the classes
       # and the conditional probabilities
       # Parameter data has to be an instance of CsvDataSet
+      # @param data [Object]
+      # @return [Object]
       def build(data)
-        raise "Error instance must be passed" unless data.is_a?(DataSet)
-        raise "Data should not be empty" if data.data_items.length == 0
+        raise 'Error instance must be passed' unless data.is_a?(Ai4r::Data::DataSet)
+        raise 'Data should not be empty' if data.data_items.empty?
         initialize_domain_data(data)
         initialize_klass_index
         initialize_pc
         calculate_probabilities
-        return self
+        self
+      end
+      # Naive Bayes classifiers cannot generate human readable rules.
+      # This method returns a descriptive string explaining that rule
+      # extraction is not supported for this algorithm.
+      def get_rules
+        'NaiveBayes does not support rule extraction.'
       end
       private
+      # @param data [Object]
+      # @return [Object]
       def initialize_domain_data(data)
         @domains = data.build_domains
         @data_items = data.data_items.map { |item| DataEntry.new(item[0...-1], item.last) }
         @data_labels = data.data_labels[0...-1]
-        @klasses = @domains.last.to_a
+        @klasses = @domains.last.to_a.sort
       end
       # calculates the klass probability of a data entry
       # as usual, the probability of the value is multiplied with every conditional
       # probability of every attribute in condition to a specific class
       # this is repeated for every class
+      # @param data [Object]
+      # @param prob [Object]
+      # @return [Object]
       def calculate_class_probabilities_for_entry(data, prob)
-        prob.each_with_index do |prob_entry, prob_index|
+        0.upto(prob.length - 1) do |prob_index|
           data.each_with_index do |att, index|
-            next if value_index(att, index).nil?
-            prob[prob_index] *= @pcp[index][value_index(att, index)][prob_index]
+            val_index = value_index(att, index)
+            if val_index.nil?
+              case @unknown_value_strategy
+              when :ignore
+                next
+              when :uniform
+                value_count = @pcc[index].count { |arr| arr[prob_index].positive? }
+                value_count = 1 if value_count.zero?
+                prob[prob_index] *= 1.0 / value_count
+              when :error
+                raise "Unknown value '#{att}' for attribute #{@data_labels[index]}"
+              else
+                next
+              end
+            else
+              prob[prob_index] *= @pcp[index][val_index][prob_index]
+            end
           end
+          # rubocop:enable Metrics/ClassLength
         end
+        prob
       end
       # normalises the array of probabilities so the sum of the array equals 1
+      # @param prob [Object]
+      # @return [Object]
       def normalize_class_probability(prob)
         prob_sum = sum(prob)
-        prob_sum > 0 ?
-          prob.map {|prob_entry| prob_entry / prob_sum } :
+        if prob_sum.positive?
+          prob.map { |prob_entry| prob_entry / prob_sum }
+        else
           prob
+        end
       end
       # sums an array up; returns a number of type Float
+      # @param array [Object]
+      # @return [Object]
       def sum(array)
-        array.inject(0.0){|b, i| b+i}
+        array.sum(0.0)
       end
       # returns the name of the class when the index is found
+      # @param index [Object]
+      # @return [Object]
       def index_to_klass(index)
-        @klass_index.has_value?(index) ? @klass_index.key(index) : nil
+        @klass_index.value?(index) ? @klass_index.key(index) : nil
       end
       # initializes @values and @klass_index; maps a certain value to a uniq index
+      # @return [Object]
       def initialize_klass_index
         @klasses.each_with_index do |dl, index|
           @klass_index[dl] = index
         end
-        @data_labels.each_with_index do |dl, index|
+        0.upto(@data_labels.length - 1) do |index|
           @values[index] = {}
-          @domains[index].each_with_index do |d, d_index|
+          @domains[index].to_a.sort.each_with_index do |d, d_index|
             @values[index][d] = d_index
           end
         end
       end
       # returns the index of a class
+      # @param klass [Object]
+      # @return [Object]
       def klass_index(klass)
         @klass_index[klass]
       end
       # returns the index of a value, depending on the attribute index
+      # @param value [Object]
+      # @param dl_index [Object]
+      # @return [Object]
       def value_index(value, dl_index)
         @values[dl_index][value]
       end
       # builds an array of the form:
       # array[attributes][values][classes]
-      def build_array(dl, index)
+      # @param index [Object]
+      # @return [Object]
+      def build_array(index)
         domains = Array.new(@domains[index].length)
-        domains.map do |p1|
-          pl = Array.new @klasses.length, 0
+        domains.map do
+          Array.new @klasses.length, 0
         end
       end
       # initializes the two array for storing the count and conditional probabilities of
       # the attributes
+      # @return [Object]
       def initialize_pc
-        @data_labels.each_with_index do |dl, index|
-          @pcc << build_array(dl, index)
-          @pcp << build_array(dl, index)
+        0.upto(@data_labels.length - 1) do |index|
+          @pcc << build_array(index)
+          @pcp << build_array(index)
         end
       end
       # calculates the occurrences of a class and the instances of a certain value of a
       # certain attribute and the assigned class.
       # In addition to that, it also calculates the conditional probabilities and values
+      # @return [Object]
       def calculate_probabilities
-        @klasses.each {|dl| @class_counts[klass_index(dl)] = 0}
+        @klasses.each { |dl| @class_counts[klass_index(dl)] = 0 }
         calculate_class_probabilities
         count_instances
         calculate_conditional_probabilities
       end
+      # @return [Object]
       def calculate_class_probabilities
         @data_items.each do |entry|
           @class_counts[klass_index(entry.klass)] += 1
@@ -218,48 +280,50 @@ module Ai4r
       end
       # counts the instances of a certain value of a certain attribute and the assigned class
+      # @return [Object]
       def count_instances
         @data_items.each do |item|
-          @data_labels.each_with_index do |dl, dl_index|
+          0.upto(@data_labels.length - 1) do |dl_index|
             @pcc[dl_index][value_index(item[dl_index], dl_index)][klass_index(item.klass)] += 1
           end
         end
       end
       # calculates the conditional probability and stores it in the @pcp-array
+      # @return [Object]
       def calculate_conditional_probabilities
         @pcc.each_with_index do |attributes, a_index|
           attributes.each_with_index do |values, v_index|
             values.each_with_index do |klass, k_index|
-              @pcp[a_index][v_index][k_index] = (klass.to_f + @m * @class_prob[k_index]) / (@class_counts[k_index] + @m).to_f
+              @pcp[a_index][v_index][k_index] =
+                (klass.to_f + (@m * @class_prob[k_index])) / (@class_counts[k_index] + @m)
             end
           end
         end
       end
-      #DataEntry stores the instance of the data entry
-      #the data is accessible via entries
-      #stores the class-column in the attribute klass and
-      #removes the column for the class-entry
+      # DataEntry stores the instance of the data entry
+      # the data is accessible via entries
+      # stores the class-column in the attribute klass and
+      # removes the column for the class-entry
       class DataEntry
         attr_accessor :klass, :entries
+        # @param attributes [Object]
+        # @param klass [Object]
+        # @return [Object]
         def initialize(attributes, klass)
           @klass = klass
           @entries = attributes
         end
         # wrapper method for the access to @entries
+        # @param index [Object]
+        # @return [Object]
         def [](index)
           @entries[index]
         end
       end
     end
   end
 end
-# Monkeypatch to support both ruby 1.8 and 1.9 (key vs index method)
-class Hash
-  alias_method(:key, :index) unless method_defined?(:key)
-end