RubyGems - ai4ruby - Versions diffs - 1.11 - Mend

ai4ruby 1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (79) hide show

data/README.rdoc +47 -0
data/examples/classifiers/id3_data.csv +121 -0
data/examples/classifiers/id3_example.rb +29 -0
data/examples/classifiers/naive_bayes_data.csv +11 -0
data/examples/classifiers/naive_bayes_example.rb +16 -0
data/examples/classifiers/results.txt +31 -0
data/examples/genetic_algorithm/genetic_algorithm_example.rb +37 -0
data/examples/genetic_algorithm/travel_cost.csv +16 -0
data/examples/neural_network/backpropagation_example.rb +67 -0
data/examples/neural_network/patterns_with_base_noise.rb +68 -0
data/examples/neural_network/patterns_with_noise.rb +66 -0
data/examples/neural_network/training_patterns.rb +68 -0
data/examples/neural_network/xor_example.rb +35 -0
data/examples/som/som_data.rb +156 -0
data/examples/som/som_multi_node_example.rb +22 -0
data/examples/som/som_single_example.rb +24 -0
data/lib/ai4r.rb +33 -0
data/lib/ai4r/classifiers/classifier.rb +62 -0
data/lib/ai4r/classifiers/hyperpipes.rb +118 -0
data/lib/ai4r/classifiers/ib1.rb +121 -0
data/lib/ai4r/classifiers/id3.rb +326 -0
data/lib/ai4r/classifiers/multilayer_perceptron.rb +135 -0
data/lib/ai4r/classifiers/naive_bayes.rb +259 -0
data/lib/ai4r/classifiers/one_r.rb +110 -0
data/lib/ai4r/classifiers/prism.rb +197 -0
data/lib/ai4r/classifiers/zero_r.rb +73 -0
data/lib/ai4r/clusterers/average_linkage.rb +59 -0
data/lib/ai4r/clusterers/bisecting_k_means.rb +93 -0
data/lib/ai4r/clusterers/centroid_linkage.rb +66 -0
data/lib/ai4r/clusterers/clusterer.rb +61 -0
data/lib/ai4r/clusterers/complete_linkage.rb +67 -0
data/lib/ai4r/clusterers/diana.rb +139 -0
data/lib/ai4r/clusterers/k_means.rb +126 -0
data/lib/ai4r/clusterers/median_linkage.rb +61 -0
data/lib/ai4r/clusterers/single_linkage.rb +194 -0
data/lib/ai4r/clusterers/ward_linkage.rb +64 -0
data/lib/ai4r/clusterers/ward_linkage_hierarchical.rb +31 -0
data/lib/ai4r/clusterers/weighted_average_linkage.rb +61 -0
data/lib/ai4r/data/data_set.rb +266 -0
data/lib/ai4r/data/parameterizable.rb +64 -0
data/lib/ai4r/data/proximity.rb +100 -0
data/lib/ai4r/data/statistics.rb +77 -0
data/lib/ai4r/experiment/classifier_evaluator.rb +95 -0
data/lib/ai4r/genetic_algorithm/genetic_algorithm.rb +270 -0
data/lib/ai4r/neural_network/backpropagation.rb +326 -0
data/lib/ai4r/neural_network/hopfield.rb +149 -0
data/lib/ai4r/som/layer.rb +68 -0
data/lib/ai4r/som/node.rb +96 -0
data/lib/ai4r/som/som.rb +155 -0
data/lib/ai4r/som/two_phase_layer.rb +90 -0
data/test/classifiers/hyperpipes_test.rb +84 -0
data/test/classifiers/ib1_test.rb +78 -0
data/test/classifiers/id3_test.rb +208 -0
data/test/classifiers/multilayer_perceptron_test.rb +79 -0
data/test/classifiers/naive_bayes_test.rb +43 -0
data/test/classifiers/one_r_test.rb +62 -0
data/test/classifiers/prism_test.rb +85 -0
data/test/classifiers/zero_r_test.rb +49 -0
data/test/clusterers/average_linkage_test.rb +51 -0
data/test/clusterers/bisecting_k_means_test.rb +66 -0
data/test/clusterers/centroid_linkage_test.rb +53 -0
data/test/clusterers/complete_linkage_test.rb +57 -0
data/test/clusterers/diana_test.rb +69 -0
data/test/clusterers/k_means_test.rb +100 -0
data/test/clusterers/median_linkage_test.rb +53 -0
data/test/clusterers/single_linkage_test.rb +122 -0
data/test/clusterers/ward_linkage_hierarchical_test.rb +61 -0
data/test/clusterers/ward_linkage_test.rb +53 -0
data/test/clusterers/weighted_average_linkage_test.rb +53 -0
data/test/data/data_set_test.rb +96 -0
data/test/data/proximity_test.rb +81 -0
data/test/data/statistics_test.rb +65 -0
data/test/experiment/classifier_evaluator_test.rb +76 -0
data/test/genetic_algorithm/chromosome_test.rb +58 -0
data/test/genetic_algorithm/genetic_algorithm_test.rb +81 -0
data/test/neural_network/backpropagation_test.rb +82 -0
data/test/neural_network/hopfield_test.rb +72 -0
data/test/som/som_test.rb +97 -0
metadata +168 -0

data/lib/ai4r/neural_network/backpropagation.rb ADDED

@@ -0,0 +1,326 @@
+# Author::    Sergio Fierens
+# License::   MPL 1.1
+# Project::   ai4r
+# Url::       http://ai4r.rubyforge.org/
+#
+# You can redistribute it and/or modify it under the terms of
+# the Mozilla Public License version 1.1  as published by the
+# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
+require File.dirname(__FILE__) + '/../data/parameterizable'
+module Ai4r
+  # Artificial Neural Networks are mathematical or computational models based on
+  # biological neural networks.
+  #
+  # More about neural networks:
+  #
+  # * http://en.wikipedia.org/wiki/Artificial_neural_network
+  #
+  module NeuralNetwork
+    # = Introduction
+    #
+    # This is an implementation of a multilayer perceptron network, using
+    # the backpropagation algorithm for learning.
+    #
+    # Backpropagation is a supervised learning technique (described
+    # by Paul Werbos in 1974, and further developed by David E.
+    # Rumelhart, Geoffrey E. Hinton and Ronald J. Williams in 1986)
+    #
+    # = Features
+    #
+    # * Support for any network architecture (number of layers and neurons)
+    # * Configurable propagation function
+    # * Optional usage of bias
+    # * Configurable momentum
+    # * Configurable learning rate
+    # * Configurable initial weight function
+    # * 100% ruby code, no external dependency
+    #
+    # = Parameters
+    #
+    # Use class method get_parameters_info to obtain details on the algorithm
+    # parameters. Use set_parameters to set values for this parameters.
+    #
+    # * :disable_bias => If true, the alforithm will not use bias nodes.
+    #   False by default.
+    # * :initial_weight_function => f(n, i, j) must return the initial
+    #   weight for the conection between the node i in layer n, and node j in
+    #   layer n+1. By default a random number in [-1, 1) range.
+    # * :propagation_function => By default:
+    #   lambda { |x| 1/(1+Math.exp(-1*(x))) }
+    # * :derivative_propagation_function => Derivative of the propagation
+    #   function, based on propagation function output.
+    #   By default: lambda { |y| y*(1-y) }, where y=propagation_function(x)
+    # * :learning_rate => By default 0.25
+    # * :momentum => By default 0.1. Set this parameter to 0 to disable
+    #   momentum
+    #
+    # = How to use it
+    #
+    #   # Create the network with 4 inputs, 1 hidden layer with 3 neurons,
+    #   # and 2 outputs
+    #   net = Ai4r::NeuralNetwork::Backpropagation.new([4, 3, 2])
+    #
+    #   # Train the network
+    #   1000.times do |i|
+    #     net.train(example[i], result[i])
+    #   end
+    #
+    #   # Use it: Evaluate data with the trained network
+    #   net.eval([12, 48, 12, 25])
+    #     =>  [0.86, 0.01]
+    #
+    # More about multilayer perceptron neural networks and backpropagation:
+    #
+    # * http://en.wikipedia.org/wiki/Backpropagation
+    # * http://en.wikipedia.org/wiki/Multilayer_perceptron
+    #
+    # = About the project
+    # Author::    Sergio Fierens
+    # License::   MPL 1.1
+    # Url::       http://ai4r.rubyforge.org
+    class Backpropagation
+      include Ai4r::Data::Parameterizable
+      parameters_info :disable_bias => "If true, the alforithm will not use "+
+            "bias nodes. False by default.",
+        :initial_weight_function => "f(n, i, j) must return the initial "+
+            "weight for the conection between the node i in layer n, and "+
+            "node j in layer n+1. By default a random number in [-1, 1) range.",
+        :propagation_function => "By default: " +
+            "lambda { |x| 1/(1+Math.exp(-1*(x))) }",
+        :derivative_propagation_function => "Derivative of the propagation "+
+            "function, based on propagation function output. By default: " +
+            "lambda { |y| y*(1-y) }, where y=propagation_function(x)",
+        :learning_rate => "By default 0.25",
+        :momentum => "By default 0.1. Set this parameter to 0 to disable "+
+            "momentum."
+      attr_accessor :structure, :weights, :activation_nodes, :last_changes
+      # Creates a new network specifying the its architecture.
+      # E.g.
+      #
+      #   net = Backpropagation.new([4, 3, 2])  # 4 inputs
+      #                                         # 1 hidden layer with 3 neurons,
+      #                                         # 2 outputs
+      #   net = Backpropagation.new([2, 3, 3, 4])   # 2 inputs
+      #                                             # 2 hidden layer with 3 neurons each,
+      #                                             # 4 outputs
+      #   net = Backpropagation.new([2, 1])   # 2 inputs
+      #                                       # No hidden layer
+      #                                       # 1 output
+      def initialize(network_structure)
+        @structure = network_structure
+        @initial_weight_function = lambda { |n, i, j| ((rand 2000)/1000.0) - 1}
+        @propagation_function = lambda { |x| 1/(1+Math.exp(-1*(x))) } #lambda { |x| Math.tanh(x) }
+        @derivative_propagation_function = lambda { |y| y*(1-y) } #lambda { |y| 1.0 - y**2 }
+        @disable_bias = false
+        @learning_rate = 0.25
+        @momentum = 0.1
+      end
+      # Evaluates the input.
+      # E.g.
+      #     net = Backpropagation.new([4, 3, 2])
+      #     net.eval([25, 32.3, 12.8, 1.5])
+      #         # =>  [0.83, 0.03]
+      def eval(input_values)
+        check_input_dimension(input_values.length)
+        init_network if !@weights
+        feedforward(input_values)
+        return @activation_nodes.last.clone
+      end
+      # This method trains the network using the backpropagation algorithm.
+      #
+      # input: Networks input
+      #
+      # output: Expected output for the given input.
+      #
+      # This method returns the network error:
+      # => 0.5 * sum( (expected_value[i] - output_value[i])**2 )
+      def train(inputs, outputs)
+        eval(inputs)
+        backpropagate(outputs)
+        calculate_error(outputs)
+      end
+      # Initialize (or reset) activation nodes and weights, with the
+      # provided net structure and parameters.
+      def init_network
+        init_activation_nodes
+        init_weights
+        init_last_changes
+        return self
+      end
+      protected
+      # Custom serialization. It used to fail trying to serialize because
+      # it uses lambda functions internally, and they cannot be serialized.
+      # Now it does not fail, but if you customize the values of
+      # * initial_weight_function
+      # * propagation_function
+      # * derivative_propagation_function
+      # you must restore their values manually after loading the instance.
+      def marshal_dump
+        [
+          @structure,
+          @disable_bias,
+          @learning_rate,
+          @momentum,
+          @weights,
+          @last_changes,
+          @activation_nodes
+        ]
+     end
+     def marshal_load(ary)
+       @structure,
+          @disable_bias,
+          @learning_rate,
+          @momentum,
+          @weights,
+          @last_changes,
+          @activation_nodes = ary
+       @initial_weight_function = lambda { |n, i, j| ((rand 2000)/1000.0) - 1}
+       @propagation_function = lambda { |x| 1/(1+Math.exp(-1*(x))) } #lambda { |x| Math.tanh(x) }
+       @derivative_propagation_function = lambda { |y| y*(1-y) } #lambda { |y| 1.0 - y**2 }
+     end
+      # Propagate error backwards
+      def backpropagate(expected_output_values)
+        check_output_dimension(expected_output_values.length)
+        calculate_output_deltas(expected_output_values)
+        calculate_internal_deltas
+        update_weights
+      end
+      # Propagate values forward
+      def feedforward(input_values)
+        input_values.each_index do |input_index|
+          @activation_nodes.first[input_index] = input_values[input_index]
+        end
+        @weights.each_index do |n|
+          @structure[n+1].times do |j|
+            sum = 0.0
+            @activation_nodes[n].each_index do |i|
+              sum += (@activation_nodes[n][i] * @weights[n][i][j])
+            end
+            @activation_nodes[n+1][j] = @propagation_function.call(sum)
+          end
+        end
+      end
+      # Initialize neurons structure.
+      def init_activation_nodes
+        @activation_nodes = Array.new(@structure.length) do |n|
+          Array.new(@structure[n], 1.0)
+        end
+        if not disable_bias
+          @activation_nodes[0...-1].each {|layer| layer << 1.0 }
+        end
+      end
+      # Initialize the weight arrays using function specified with the
+      # initial_weight_function parameter
+      def init_weights
+        @weights = Array.new(@structure.length-1) do |i|
+          nodes_origin = @activation_nodes[i].length
+          nodes_target = @structure[i+1]
+          Array.new(nodes_origin) do |j|
+            Array.new(nodes_target) do |k|
+              @initial_weight_function.call(i, j, k)
+            end
+          end
+        end
+      end
+      # Momentum usage need to know how much a weight changed in the
+      # previous training. This method initialize the @last_changes
+      # structure with 0 values.
+      def init_last_changes
+        @last_changes = Array.new(@weights.length) do |w|
+          Array.new(@weights[w].length) do |i|
+            Array.new(@weights[w][i].length, 0.0)
+          end
+        end
+      end
+      # Calculate deltas for output layer
+      def calculate_output_deltas(expected_values)
+        output_values = @activation_nodes.last
+        output_deltas = []
+        output_values.each_index do |output_index|
+          error = expected_values[output_index] - output_values[output_index]
+          output_deltas << @derivative_propagation_function.call(
+            output_values[output_index]) * error
+        end
+        @deltas = [output_deltas]
+      end
+      # Calculate deltas for hidden layers
+      def calculate_internal_deltas
+        prev_deltas = @deltas.last
+        (@activation_nodes.length-2).downto(1) do |layer_index|
+          layer_deltas = []
+          @activation_nodes[layer_index].each_index do |j|
+            error = 0.0
+            @structure[layer_index+1].times do |k|
+              error += prev_deltas[k] * @weights[layer_index][j][k]
+            end
+            layer_deltas[j] = (@derivative_propagation_function.call(
+              @activation_nodes[layer_index][j]) * error)
+          end
+          prev_deltas = layer_deltas
+          @deltas.unshift(layer_deltas)
+        end
+      end
+      # Update weights after @deltas have been calculated.
+      def update_weights
+        (@weights.length-1).downto(0) do |n|
+          @weights[n].each_index do |i|
+            @weights[n][i].each_index do |j|
+              change = @deltas[n][j]*@activation_nodes[n][i]
+              @weights[n][i][j] += ( learning_rate * change +
+                  momentum * @last_changes[n][i][j])
+              @last_changes[n][i][j] = change
+            end
+          end
+        end
+      end
+      # Calculate quadratic error for a expected output value
+      # Error = 0.5 * sum( (expected_value[i] - output_value[i])**2 )
+      def calculate_error(expected_output)
+        output_values = @activation_nodes.last
+        error = 0.0
+        expected_output.each_index do |output_index|
+          error +=
+            0.5*(output_values[output_index]-expected_output[output_index])**2
+        end
+        return error
+      end
+      def check_input_dimension(inputs)
+        raise ArgumentError, "Wrong number of inputs. " +
+          "Expected: #{@structure.first}, " +
+          "received: #{inputs}." if inputs!=@structure.first
+      end
+      def check_output_dimension(outputs)
+        raise ArgumentError, "Wrong number of outputs. " +
+          "Expected: #{@structure.last}, " +
+          "received: #{outputs}." if outputs!=@structure.last
+      end
+    end
+  end
+end

data/lib/ai4r/neural_network/hopfield.rb ADDED

@@ -0,0 +1,149 @@
+# Author::    Sergio Fierens
+# License::   MPL 1.1
+# Project::   ai4r
+# Url::       http://ai4r.rubyforge.org/
+#
+# You can redistribute it and/or modify it under the terms of
+# the Mozilla Public License version 1.1  as published by the
+# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
+require File.dirname(__FILE__) + '/../data/parameterizable'
+ module Ai4r
+  module NeuralNetwork
+    # = Hopfield Net =
+    #
+    # A Hopfield Network is a recurrent Artificial Neural Network.
+    # Hopfield nets are able to memorize a set of patterns, and then evaluate
+    # an input, returning the most similar stored pattern (although
+    # convergence to one of the stored patterns is not guaranteed).
+    # Hopfield nets are great to deal with input noise. If a system accepts a
+    # discrete set of inputs, but inputs are subject to noise, you can use a
+    # Hopfield net to eliminate noise and identified the given input.
+    #
+    # = How to Use =
+    #
+    #   data_set = Ai4r::Data::DataSet.new :data_items => array_of_patterns
+    #   net = Ai4r::NeuralNetworks::Hopfield.new.train data_set
+    #   net.eval input
+    #     => one of the stored patterns in array_of_patterns
+    class Hopfield
+      include Ai4r::Data::Parameterizable
+      attr_reader :weights, :nodes
+      parameters_info :eval_iterations => "The network will run for a maximum "+
+        "of 'eval_iterations' iterations while evaluating an input. 500 by " +
+        "default.",
+        :active_node_value => "Default: 1",
+        :inactive_node_value => "Default: -1",
+        :threshold => "Default: 0"
+      def initialize
+        @eval_iterations = 500
+        @active_node_value = 1
+        @inactive_node_value = -1
+        @threshold = 0
+      end
+      # Prepares the network to memorize the given data set.
+      # Future calls to eval (should) return one of the memorized data items.
+      # A Hopfield network converges to a local minimum, but converge to one
+      # of the "memorized" patterns is not guaranteed.
+      def train(data_set)
+        @data_set = data_set
+        initialize_nodes(@data_set)
+        initialize_weights(@data_set)
+        return self
+      end
+      # You can use run instead of eval to propagate values step by step.
+      # With this you can verify the progress of the network output with
+      # each step.
+      #
+      # E.g.:
+      #   pattern = input
+      #   100.times do
+      #      pattern = net.run(pattern)
+      #      puts pattern.inspect
+      #   end
+      def run(input)
+        set_input(input)
+        propagate
+        return @nodes
+      end
+      # Propagates the input until the network returns one of the memorized
+      # patterns, or a maximum of "eval_iterations" times.
+      def eval(input)
+        set_input(input)
+        @eval_iterations.times do
+          propagate
+          break if @data_set.data_items.include?(@nodes)
+        end
+        return @nodes
+      end
+      protected
+      # Set all nodes state to the given input.
+      # inputs parameter must have the same dimension as nodes
+      def set_input(inputs)
+        raise ArgumentError unless inputs.length == @nodes.length
+        inputs.each_with_index { |input, i| @nodes[i] = input}
+      end
+      # Select a single node randomly and propagate its state to all other nodes
+      def propagate
+        sum = 0
+        i = (rand * @nodes.length).floor
+        @nodes.each_with_index {|node, j| sum += read_weight(i,j)*node }
+        @nodes[i] = (sum > @threshold) ? @active_node_value : @inactive_node_value
+      end
+      # Initialize all nodes with "inactive" state.
+      def initialize_nodes(data_set)
+        @nodes = Array.new(data_set.data_items.first.length,
+          @inactive_node_value)
+      end
+      # Create a partial weigth matrix:
+      #   [
+      #     [w(1,0)],
+      #     [w(2,0)], [w(2,1)],
+      #     [w(3,0)], [w(3,1)], [w(3,2)],
+      #     ...
+      #     [w(n-1,0)], [w(n-1,1)], [w(n-1,2)], ... , [w(n-1,n-2)]
+      #   ]
+      # where n is the number of nodes.
+      #
+      # We are saving memory here, as:
+      #
+      # * w[i][i] = 0 (no node connects with itself)
+      # * w[i][j] = w[j][i] (weigths are symmetric)
+      #
+      # Use read_weight(i,j) to find out weight between node i and j
+      def initialize_weights(data_set)
+        @weights = Array.new(@nodes.length-1) {|l| Array.new(l+1)}
+        @nodes.each_index do |i|
+          i.times do |j|
+            @weights[i-1][j] = data_set.data_items.inject(0) { |sum, item| sum+= item[i]*item[j] }
+          end
+        end
+      end
+      # read_weight(i,j) reads the weigth matrix and returns weight between
+      # node i and j
+      def read_weight(index_a, index_b)
+        return 0 if index_a == index_b
+        index_a, index_b = index_b, index_a if index_b > index_a
+        return @weights[index_a-1][index_b]
+      end
+    end
+  end
+end