RubyGems - ai4r - Versions diffs - 1.3 → 1.4 - Mend

ai4r 1.3 → 1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

data/README.rdoc +6 -12
data/examples/neural_network/backpropagation_example.rb +18 -16
data/examples/neural_network/xor_example.rb +30 -20
data/lib/ai4r/classifiers/classifier.rb +15 -4
data/lib/ai4r/classifiers/id3.rb +31 -31
data/lib/ai4r/clusterers/clusterer.rb +5 -24
data/lib/ai4r/clusterers/k_means.rb +7 -38
data/lib/ai4r/data/data_set.rb +4 -2
data/lib/ai4r/data/parameterizable.rb +64 -0
data/lib/ai4r/neural_network/backpropagation.rb +233 -210
data/site/build/site/en/downloads.html +3 -3
data/site/build/site/en/geneticAlgorithms.html +3 -3
data/site/build/site/en/index.html +32 -15
data/site/build/site/en/index.pdf +126 -100
data/site/build/site/en/linkmap.html +7 -9
data/site/build/site/en/linkmap.pdf +12 -12
data/site/build/site/en/machineLearning.html +7 -6
data/site/build/site/en/machineLearning.pdf +29 -29
data/site/build/site/en/neuralNetworks.html +164 -127
data/site/build/site/en/neuralNetworks.pdf +267 -200
data/site/build/site/en/svn.html +4 -4
data/site/build/tmp/cocoon-work/cache-dir/cocoon-ehcache-1.data +0 -0
data/site/build/tmp/cocoon-work/cache-dir/cocoon-ehcache-1.index +0 -0
data/site/build/tmp/projfilters.properties +1 -1
data/site/build/webapp/WEB-INF/logs/core.log +670 -489
data/site/build/webapp/WEB-INF/logs/error.log +213 -364
data/site/build/webapp/WEB-INF/logs/sitemap.log +0 -368
data/site/src/documentation/content/xdocs/index.xml +1 -1
data/site/src/documentation/content/xdocs/neuralNetworks.xml +118 -90
data/site/src/documentation/content/xdocs/site.xml +2 -3
data/test/neural_network/backpropagation_test.rb +23 -0
metadata +5 -7
data/site/build/site/en/forum.html +0 -197
data/site/build/site/en/forum.pdf +0 -151
data/site/build/site/en/wholesite.pdf +0 -1915

data/README.rdoc CHANGED

@@ -16,18 +16,12 @@ http://ai4r.rubyforge.org
 2. Include require statements in your code:
   require "rubygems"
-  require "ai4r/classifiers/id3"en
-  require "ai4r/classifiers/prism"
-  require "ai4r/classifiers/one_r"
-  require "ai4r/classifiers/zero_r"
-  require "ai4r/neural_network/backpropagation"
-  require "ai4r/genetic_algorithm/genetic_algorithm"
-= Feedback
-If you have questions or constructive comments about this project,
-please post them in the forum. If you do not want to make it public,
-send it to me: Sergio Fierens (sergio(dot)fierens(at)gmail(dot)com)
+  require "ai4r"
+= More Info
+ * AI4R wiki: http://wiki.jadeferret.com/Category:AI4R
+ * AI4R Project site: http://ai4r.rubyforge.org
 = Warranty

data/examples/neural_network/backpropagation_example.rb CHANGED

@@ -15,25 +15,28 @@ require 'benchmark'
 times = Benchmark.measure do
+    srand 1
     net = Ai4r::NeuralNetwork::Backpropagation.new([256, 3])
+    tr_input = TRIANGLE.flatten.collect { |input| input.to_f / 5.0}
+    sq_input = SQUARE.flatten.collect { |input| input.to_f / 5.0}
+    cr_input = CROSS.flatten.collect { |input| input.to_f / 5.0}
-    tr_input = TRIANGLE.flatten.collect { |input| input.to_f / 10}
-    sq_input = SQUARE.flatten.collect { |input| input.to_f / 10}
-    cr_input = CROSS.flatten.collect { |input| input.to_f / 10}
-    tr_with_noise = TRIANGLE_WITH_NOISE.flatten.collect { |input| input.to_f / 10}
-    sq_with_noise = SQUARE_WITH_NOISE.flatten.collect { |input| input.to_f / 10}
-    cr_with_noise = CROSS_WITH_NOISE.flatten.collect { |input| input.to_f / 10}
+    tr_with_noise = TRIANGLE_WITH_NOISE.flatten.collect { |input| input.to_f / 5.0}
+    sq_with_noise = SQUARE_WITH_NOISE.flatten.collect { |input| input.to_f / 5.0}
+    cr_with_noise = CROSS_WITH_NOISE.flatten.collect { |input| input.to_f / 5.0}
-    tr_with_base_noise = TRIANGLE_WITH_BASE_NOISE.flatten.collect { |input| input.to_f / 10}
-    sq_with_base_noise = SQUARE_WITH_BASE_NOISE.flatten.collect { |input| input.to_f / 10}
-    cr_with_base_noise = CROSS_WITH_BASE_NOISE.flatten.collect { |input| input.to_f / 10}
+    tr_with_base_noise = TRIANGLE_WITH_BASE_NOISE.flatten.collect { |input| input.to_f / 5.0}
+    sq_with_base_noise = SQUARE_WITH_BASE_NOISE.flatten.collect { |input| input.to_f / 5.0}
+    cr_with_base_noise = CROSS_WITH_BASE_NOISE.flatten.collect { |input| input.to_f / 5.0}
     puts "Training the network, please wait."
-    200.times do
-      net.train(tr_input, [1,0,0])
-      net.train(sq_input, [0,1,0])
-      net.train(cr_input, [0,0,1])
+    101.times do |i|
+      error = net.train(tr_input, [1,0,0])
+      error = net.train(sq_input, [0,1,0])
+      error = net.train(cr_input, [0,0,1])
+      puts "Error after iteration #{i}:\t#{error}" if i%20 == 0
     end
     def result_label(result)
@@ -58,8 +61,7 @@ times = Benchmark.measure do
     puts "#{net.eval(tr_with_base_noise).inspect} => #{result_label(net.eval(tr_with_base_noise))}"
     puts "#{net.eval(sq_with_base_noise).inspect} => #{result_label(net.eval(sq_with_base_noise))}"
     puts "#{net.eval(cr_with_base_noise).inspect} => #{result_label(net.eval(cr_with_base_noise))}"
 end
-  puts "Elapsed time: #{times}"
+puts "Elapsed time: #{times}"

data/examples/neural_network/xor_example.rb CHANGED

@@ -1,25 +1,35 @@
-require File.dirname(__FILE__) + '/training_patterns'
-require File.dirname(__FILE__) + '/patterns_with_noise'
-require File.dirname(__FILE__) + '/patterns_with_base_noise'
-require File.dirname(__FILE__) + '/../../lib/ai4r/neural_network/backpropagation'
+# Author::    Sergio Fierens
+# License::   MPL 1.1
+# Project::   ai4r
+# Url::       http://ai4r.rubyforge.org/
+#
+# You can redistribute it and/or modify it under the terms of
+# the Mozilla Public License version 1.1  as published by the
+# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
-examples = [
-    [[0, 0], [0, 1]],
-    [[0, 1], [1, 0]],
-    [[1, 0], [1, 0]],
-    [[1, 1], [0, 0]]
-]
+require File.dirname(__FILE__) + '/../../lib/ai4r/neural_network/backpropagation'
+require 'benchmark'
-net = Ai4r::NeuralNetwork::Backpropagation.new([2, 1, 2, 1])
+times = Benchmark.measure do
-i=0
-200.times {
-    examples.each do |ex|
-        2000.times {net.train(ex[0], [ex[1].first])}
+    srand 1
+    net = Ai4r::NeuralNetwork::Backpropagation.new([2, 2, 1])
+    puts "Training the network, please wait."
+    2001.times do |i|
+      net.train([0,0], [0])
+      net.train([0,1], [1])
+      net.train([1,0], [1])
+      error = net.train([1,1], [0])
+      puts "Error after iteration #{i}:\t#{error}" if i%200 == 0
     end
-    puts(i=i+1)
-}
+    puts "Test data"
+    puts "[0,0] = > #{net.eval([0,0]).inspect}"
+    puts "[0,1] = > #{net.eval([0,1]).inspect}"
+    puts "[1,0] = > #{net.eval([1,0]).inspect}"
+    puts "[1,1] = > #{net.eval([1,1]).inspect}"
+end
-examples.each do |ex|
-    print ex[0], ' => ', net.eval(ex[0]).inspect, ', should be ', ex[1].first, "\n"
-end
+  puts "Elapsed time: #{times}"

data/lib/ai4r/classifiers/classifier.rb CHANGED

@@ -1,19 +1,23 @@
 # Author::    Sergio Fierens
 # License::   MPL 1.1
 # Project::   ai4r
-# Url::       http://ai4r.rubyforge.org/
+# Url::       http://ai4r.rubyforge.org
 #
 # You can redistribute it and/or modify it under the terms of
 # the Mozilla Public License version 1.1  as published by the
 # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
+require File.dirname(__FILE__) + '/../data/parameterizable'
 module Ai4r
   module Classifiers
-    # The only purpose of this class is to define a common API for classifiers.
+    # This class defines a common API for classifiers.
     # All methods in this class must be implemented in subclasses.
     class Classifier
+      include Ai4r::Data::Parameterizable
       # Build a new classifier, using data examples found in data_set.
       def build(data_set)
         raise NotImplementedError
@@ -30,9 +34,16 @@ module Ai4r
       # e.g.
       #
       #   classifier.get_rules
-      #     # =>  marketing_target='Y'
+      #     # =>  if age_range=='<30' then marketing_target='Y'
+      #           elsif age_range=='[30-50)' and city=='Chicago' then marketing_target='Y'
+      #           elsif age_range=='[30-50)' and city=='New York' then marketing_target='N'
+      #           elsif age_range=='[50-80]' then marketing_target='N'
+      #           elsif age_range=='>80' then marketing_target='Y'
+      #           else raise 'There was not enough information during training to do a proper induction for this data element' end
       #
       # It is a nice way to inspect induction results, and also to execute them:
+      #     age_range = '<30'
+      #     city='New York'
       #     marketing_target = nil
       #     eval classifier.get_rules
       #     puts marketing_target

data/lib/ai4r/classifiers/id3.rb CHANGED

@@ -28,24 +28,26 @@ module Ai4r
     #
     #   DATA_LABELS = [ 'city', 'age_range', 'gender', 'marketing_target'  ]
     #
-    #   DATA_SET = [  ['New York',  '<30',      'M', 'Y'],
-    #            ['Chicago',     '<30',      'M', 'Y'],
-    #            ['Chicago',     '<30',      'F', 'Y'],
-    #            ['New York',  '<30',      'M', 'Y'],
-    #            ['New York',  '<30',      'M', 'Y'],
-    #            ['Chicago',     '[30-50)',  'M', 'Y'],
-    #            ['New York',  '[30-50)',  'F', 'N'],
-    #            ['Chicago',     '[30-50)',  'F', 'Y'],
-    #            ['New York',  '[30-50)',  'F', 'N'],
-    #            ['Chicago',     '[50-80]', 'M', 'N'],
-    #            ['New York',  '[50-80]', 'F', 'N'],
-    #            ['New York',  '[50-80]', 'M', 'N'],
-    #            ['Chicago',     '[50-80]', 'M', 'N'],
-    #            ['New York',  '[50-80]', 'F', 'N'],
-    #            ['Chicago',     '>80',      'F', 'Y']
-    #          ]
+    #   DATA_ITEMS = [
+    #          ['New York',  '<30',      'M', 'Y'],
+    #          ['Chicago',     '<30',      'M', 'Y'],
+    #          ['Chicago',     '<30',      'F', 'Y'],
+    #          ['New York',  '<30',      'M', 'Y'],
+    #          ['New York',  '<30',      'M', 'Y'],
+    #          ['Chicago',     '[30-50)',  'M', 'Y'],
+    #          ['New York',  '[30-50)',  'F', 'N'],
+    #          ['Chicago',     '[30-50)',  'F', 'Y'],
+    #          ['New York',  '[30-50)',  'F', 'N'],
+    #          ['Chicago',     '[50-80]', 'M', 'N'],
+    #          ['New York',  '[50-80]', 'F', 'N'],
+    #          ['New York',  '[50-80]', 'M', 'N'],
+    #          ['Chicago',     '[50-80]', 'M', 'N'],
+    #          ['New York',  '[50-80]', 'F', 'N'],
+    #          ['Chicago',     '>80',      'F', 'Y']
+    #        ]
     #
-    #   id3 = Ai4r::Classifiers::ID3.new(DATA_SET, DATA_LABELS)
+    #   data_set = DataSet.new(:data_items=>DATA_SET, :data_labels=>DATA_LABELS)
+    #   id3 = Ai4r::Classifiers::ID3.new.build(data_set)
     #
     #   id3.get_rules
     #     # =>  if age_range=='<30' then marketing_target='Y'
@@ -64,22 +66,20 @@ module Ai4r
     # attributes. Consider moving your data to an external CSV (comma separate
     # values) file.
     #
-    #   data_set = []
-    #   CSV::Reader.parse(File.open("#{File.dirname(__FILE__)}/data_set.csv", 'r')) do |row|
-    #     data_set << row
-    #   end
-    #   data_labels = data_set.shift
-    #
-    #   id3 = Ai4r::Classifiers::ID3.new(data_set, data_labels)
+    #   data_file = "#{File.dirname(__FILE__)}/data_set.csv"
+    #   data_set = DataSet.load_data_and_labels_from_csv data_file
+    #   id3 = Ai4r::Classifiers::ID3.new.build(data_set)
     #
     # = A nice tip for data evaluation
     #
-    #   id3 = Ai4r::Classifiers::ID3.new(DATA_SET, DATA_LABELS)
-    #     age_range = '<30'
-    #     marketing_target = nil
-    #     eval id3.get_rules
-    #     puts marketing_target
-    #       # =>  'Y'
+    #   id3 = Ai4r::Classifiers::ID3.new.build(data_set)
+    #
+    #   age_range = '<30'
+    #   marketing_target = nil
+    #   eval id3.get_rules
+    #   puts marketing_target
+    #     # =>  'Y'
+    #
     # = More about ID3 and decision trees
     #
     # * http://en.wikipedia.org/wiki/Decision_tree
@@ -92,7 +92,7 @@ module Ai4r
     class ID3 < Classifier
       attr_reader :data_set
       # Create a new ID3 classifier. You must provide a DataSet instance
       # as parameter.
       def build(data_set)

data/lib/ai4r/clusterers/clusterer.rb CHANGED

@@ -7,6 +7,8 @@
 # the Mozilla Public License version 1.1  as published by the
 # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
+require File.dirname(__FILE__) + '/../data/parameterizable'
 module Ai4r
   module Clusterers
@@ -14,7 +16,9 @@ module Ai4r
     # All methods in this class (other than eval) must be implemented in
     # subclasses.
     class Clusterer
+      include Ai4r::Data::Parameterizable
       # Build a new clusterer, using data examples found in data_set.
       # Data items will be clustered in "number_of_clusters" different
       # clusters.
@@ -27,29 +31,6 @@ module Ai4r
         raise NotImplementedError
       end
-      # Get info on what can be parameterized on this clusterer.
-      # It returns a hash with the following format:
-      # { :param_name => "Info on the parameter" }
-      def get_parameters_info
-        raise NotImplementedError
-      end
-      # Set parameter values on this clusterer instance.
-      # You must provide a hash with the folowing format:
-      # { :param_name => parameter_value }
-      def set_parameters(parameters)
-        raise NotImplementedError
-      end
-      # Get parameter values on this clusterer instance.
-      # Returns a hash with the folowing format:
-      # { :param_name => parameter_value }
-      def get_parameters
-        raise NotImplementedError
-      end
     end
   end
 end

data/lib/ai4r/clusterers/k_means.rb CHANGED

@@ -23,8 +23,13 @@ module Ai4r
       attr_reader :data_set, :number_of_clusters
       attr_reader :clusters, :centroids, :iterations
-      attr_accessor :max_iterations
-      attr_accessor :distance_function
+      parameters_info :max_iterations => "Maximum number of iterations to " +
+        "build the clusterer. By default it is uncapped.",
+        :distance_function => "Custom implementation of distance function. " +
+          "It must be a closure receiving two data items and return the " +
+          "distance bewteen them. By default, this algorithm uses " +
+          "ecuclidean distance of numeric attributes to the power of 2."
       # Build a new clusterer, using data examples found in data_set.
       # Items will be clustered in "number_of_clusters" different
@@ -50,42 +55,6 @@ module Ai4r
             distance(data_item, centroid)})
       end
-      # Get info on what can be parameterized on this clusterer algorithm.
-      # It returns a hash with the following format:
-      # { :param_name => "Info on the parameter" }
-      def get_parameters_info
-        { :max_iterations => "Maximum number of iterations to build the " +
-          "clusterer. By default it is uncapped.",
-          :distance_function => "Custom implementation of distance function. " +
-            "It must be a closure receiving two data items and return the " +
-            "distance bewteen them. By default, this algorithm uses " +
-            "ecuclidean distance of numeric attributes to the power of 2."
-          }
-      end
-      # Set parameters on this clusterer instance.
-      # You must provide a hash with the folowing format:
-      # { :param_name => parameter_value }
-      #
-      # Use get_parameters_info to know what parameters are accepted.
-      def set_parameters(parameters)
-        if parameters.has_key?(:max_iterations)
-          @max_iterations = parameters[:max_iterations]
-        end
-        if parameters.has_key?(:distance_function)
-          @distance_function = parameters[:distance_function]
-        end
-        return self
-      end
-      # Get parameter values on this clusterer instance.
-      # Returns a hash with the folowing format:
-      # { :param_name => parameter_value }
-      def get_parameters
-        { :max_iterations => @max_iterations,
-          :distance_function => @distance_function }
-      end
       # This function calculates the distance between 2 different
       # instances. By default, it returns the euclidean distance to the
       # power of 2.

data/lib/ai4r/data/data_set.rb CHANGED

@@ -30,9 +30,11 @@ module Ai4r
         set_data_items(options[:data_items]) if options[:data_items]
       end
-      # Retrieve data item(s) by index. You can specify a an index range, too.
+      # Retrieve a new DataSet, with the item(s) selected by the provided
+      # index. You can specify an index range, too.
       def [](index)
-        return @data_items[index]
+        return DataSet.new(:data_items=>@data_items[index],
+          :data_labels =>@data_labels)
       end
       # Load data items from csv file

data/lib/ai4r/data/parameterizable.rb ADDED

@@ -0,0 +1,64 @@
+# Author::    Sergio Fierens
+# License::   MPL 1.1
+# Project::   ai4r
+# Url::       http://ai4r.rubyforge.org/
+#
+# You can redistribute it and/or modify it under the terms of
+# the Mozilla Public License version 1.1  as published by the
+# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
+module Ai4r
+  module Data
+    module Parameterizable
+      module ClassMethods
+        # Get info on what can be parameterized on this algorithm.
+        # It returns a hash with the following format:
+        # { :param_name => "Info on the parameter" }
+        def get_parameters_info
+          return @_params_info_ || {}
+        end
+        # Set info on what can be parameterized on this algorithm.
+        # You must provide a hash with the following format:
+        # { :param_name => "Info on the parameter" }
+        def parameters_info(params_info)
+          @_params_info_ = params_info
+          params_info.keys.each do |param|
+            attr_accessor param
+          end
+        end
+      end
+      # Set parameter values on this algorithm instance.
+      # You must provide a hash with the folowing format:
+      # { :param_name => parameter_value }
+      def set_parameters(params)
+        self.class.get_parameters_info.keys.each do | key |
+          if self.respond_to?("#{key}=".to_sym)
+            send("#{key}=".to_sym, params[key]) if params.has_key? key
+          end
+        end
+        return self
+      end
+      # Get parameter values on this algorithm instance.
+      # Returns a hash with the folowing format:
+      # { :param_name => parameter_value }
+      def get_parameters
+        params = {}
+        self.class.get_parameters_info.keys.each do | key |
+          params[key] = send(key) if self.respond_to?(key)
+        end
+        return params
+      end
+      def self.included(base)
+        base.extend(ClassMethods)
+      end
+    end
+  end
+end