ai4r 1.4 → 1.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (62) hide show
  1. data/README.rdoc +24 -3
  2. data/examples/decision_trees/id3_example.rb +1 -1
  3. data/examples/genetic_algorithm/genetic_algorithm_example.rb +1 -1
  4. data/lib/ai4r.rb +11 -0
  5. data/lib/ai4r/classifiers/classifier.rb +2 -0
  6. data/lib/ai4r/classifiers/id3.rb +3 -2
  7. data/lib/ai4r/classifiers/multilayer_perceptron.rb +135 -0
  8. data/lib/ai4r/classifiers/one_r.rb +2 -1
  9. data/lib/ai4r/classifiers/prism.rb +2 -1
  10. data/lib/ai4r/classifiers/zero_r.rb +2 -1
  11. data/lib/ai4r/clusterers/average_linkage.rb +60 -0
  12. data/lib/ai4r/clusterers/bisecting_k_means.rb +17 -39
  13. data/lib/ai4r/clusterers/clusterer.rb +25 -0
  14. data/lib/ai4r/clusterers/complete_linkage.rb +62 -0
  15. data/lib/ai4r/clusterers/k_means.rb +18 -25
  16. data/lib/ai4r/clusterers/single_linkage.rb +179 -0
  17. data/lib/ai4r/data/data_set.rb +33 -41
  18. data/lib/ai4r/data/proximity.rb +82 -0
  19. data/lib/ai4r/data/statistics.rb +77 -0
  20. data/lib/ai4r/experiment/classifier_evaluator.rb +95 -0
  21. data/lib/ai4r/genetic_algorithm/genetic_algorithm.rb +2 -4
  22. data/site/build/site/en/build/tmp/build-info.xml +5 -0
  23. data/site/build/site/en/build/tmp/plugins-1.xml +212 -0
  24. data/site/build/site/en/build/tmp/plugins-2.xml +252 -0
  25. data/site/build/site/en/build/tmp/projfilters.properties +41 -0
  26. data/site/build/site/en/downloads.html +1 -1
  27. data/site/build/site/en/geneticAlgorithms.html +1 -1
  28. data/site/build/site/en/index.html +44 -7
  29. data/site/build/site/en/index.pdf +278 -155
  30. data/site/build/site/en/linkmap.html +2 -2
  31. data/site/build/site/en/linkmap.pdf +12 -12
  32. data/site/build/site/en/machineLearning.html +1 -1
  33. data/site/build/site/en/neuralNetworks.html +1 -1
  34. data/site/build/site/en/sourceCode.html +244 -0
  35. data/site/build/site/en/sourceCode.pdf +278 -0
  36. data/site/build/site/en/svn.html +34 -42
  37. data/site/build/site/en/svn.pdf +86 -114
  38. data/site/build/tmp/cocoon-work/cache-dir/cocoon-ehcache-1.data +0 -0
  39. data/site/build/tmp/cocoon-work/cache-dir/cocoon-ehcache-1.index +0 -0
  40. data/site/build/tmp/projfilters.properties +1 -1
  41. data/site/build/webapp/WEB-INF/logs/core.log +628 -629
  42. data/site/build/webapp/WEB-INF/logs/error.log +213 -213
  43. data/site/src/documentation/content/xdocs/index.xml +20 -1
  44. data/site/src/documentation/content/xdocs/site.xml +1 -1
  45. data/site/src/documentation/content/xdocs/sourceCode.xml +43 -0
  46. data/site/src/documentation/resources/images/sigmoid.png +0 -0
  47. data/test/classifiers/id3_test.rb +0 -1
  48. data/test/classifiers/multilayer_perceptron_test.rb +79 -0
  49. data/test/classifiers/one_r_test.rb +0 -2
  50. data/test/classifiers/prism_test.rb +0 -2
  51. data/test/classifiers/zero_r_test.rb +0 -2
  52. data/test/clusterers/average_linkage_test.rb +45 -0
  53. data/test/clusterers/bisecting_k_means_test.rb +0 -2
  54. data/test/clusterers/complete_linkage_test.rb +45 -0
  55. data/test/clusterers/k_means_test.rb +0 -2
  56. data/test/clusterers/single_linkage_test.rb +113 -0
  57. data/test/data/data_set_test.rb +3 -15
  58. data/test/data/proximity_test.rb +71 -0
  59. data/test/data/statistics_test.rb +65 -0
  60. data/test/experiment/classifier_evaluator_test.rb +76 -0
  61. metadata +27 -6
  62. data/site/src/documentation/content/xdocs/svn.xml +0 -41
@@ -20,10 +20,31 @@ http://ai4r.rubyforge.org
20
20
 
21
21
  = More Info
22
22
 
23
- * AI4R wiki: http://wiki.jadeferret.com/Category:AI4R
24
- * AI4R Project site: http://ai4r.rubyforge.org
23
+ * AI4R wiki: http://wiki.jadeferret.com/Category:AI4R
24
+ * AI4R Project site: http://ai4r.rubyforge.org
25
25
 
26
- = Warranty
26
+ = Contact
27
+
28
+ If you have questions or constructive comments about this project,
29
+ please post them in the forum (http://forum.jadeferret.com/viewforum.php?f=3).
30
+ I get an email notification when you post, and I do my best to answer as soon as possible.
31
+
32
+ If you do not want to make it public, send it to me: Sergio Fierens, email address: (sergio (at) jadeferret (dot) com). But please, try to post them in the forum. I get tons of emails and it would be great to make them public to help everyone.
33
+
34
+ = Roadmap
35
+
36
+ AI4R is an active project. If you are interested about what we are working on,
37
+ checkout the development roadmap: http://wiki.jadeferret.com/AI4R_RoadMap
38
+
39
+ = Disclaimer
40
+
41
+ In plain english:
42
+
43
+ This project was created by Sergio Fierens, but the AI algorithms were created by other
44
+ people who are actually much more clever than Sergio. He does his best implementing
45
+ them, but he cannot warranty that these implementations are accurate.
46
+
47
+ In legalese:
27
48
 
28
49
  This software is provided "as is" and without any express or implied warranties,
29
50
  including, without limitation, the implied warranties of merchantibility and
@@ -11,7 +11,7 @@ require File.dirname(__FILE__) + '/../../lib/ai4r/classifiers/id3'
11
11
 
12
12
  # Load data from data_set.csv
13
13
  data_filename = "#{File.dirname(__FILE__)}/data_set.csv"
14
- data_set = Ai4r::Data::DataSet.new.load_data_and_labels_from_csv data_filename
14
+ data_set = Ai4r::Data::DataSet.new.load_csv_with_labels data_filename
15
15
 
16
16
  # Build ID3 tree
17
17
  id3 = Ai4r::Classifiers::ID3.new.build(data_set)
@@ -13,7 +13,7 @@ require 'csv'
13
13
 
14
14
  # Load data from data_set.csv
15
15
  data_filename = "#{File.dirname(__FILE__)}/travel_cost.csv"
16
- data_set = Ai4r::Data::DataSet.new.load_data_and_labels_from_csv data_filename
16
+ data_set = Ai4r::Data::DataSet.new.load_csv_with_labels data_filename
17
17
  data_set.data_items.collect! {|column| column.collect {|element| element.to_f}}
18
18
 
19
19
  Ai4r::GeneticAlgorithm::Chromosome.set_cost_matrix(data_set)
@@ -1,10 +1,21 @@
1
+ # Data
2
+ require "ai4r/data/data_set"
3
+ require "ai4r/data/statistics"
4
+ require "ai4r/data/parameterizable"
5
+ # Clusterers
1
6
  require "ai4r/clusterers/clusterer"
2
7
  require "ai4r/clusterers/k_means"
3
8
  require "ai4r/clusterers/bisecting_k_means"
9
+ require "ai4r/clusterers/single_linkage"
10
+ require "ai4r/clusterers/complete_linkage"
11
+ require "ai4r/clusterers/average_linkage"
12
+ # Classifiers
4
13
  require "ai4r/classifiers/classifier"
5
14
  require "ai4r/classifiers/id3"
6
15
  require "ai4r/classifiers/prism"
7
16
  require "ai4r/classifiers/one_r"
8
17
  require "ai4r/classifiers/zero_r"
18
+ # Neural networks
9
19
  require "ai4r/neural_network/backpropagation"
20
+ # Genetic Algorithms
10
21
  require "ai4r/genetic_algorithm/genetic_algorithm"
@@ -19,6 +19,8 @@ module Ai4r
19
19
  include Ai4r::Data::Parameterizable
20
20
 
21
21
  # Build a new classifier, using data examples found in data_set.
22
+ # The last attribute of each item is considered as the
23
+ # item class.
22
24
  def build(data_set)
23
25
  raise NotImplementedError
24
26
  end
@@ -67,7 +67,7 @@ module Ai4r
67
67
  # values) file.
68
68
  #
69
69
  # data_file = "#{File.dirname(__FILE__)}/data_set.csv"
70
- # data_set = DataSet.load_data_and_labels_from_csv data_file
70
+ # data_set = DataSet.load_csv_with_labels data_file
71
71
  # id3 = Ai4r::Classifiers::ID3.new.build(data_set)
72
72
  #
73
73
  # = A nice tip for data evaluation
@@ -94,7 +94,8 @@ module Ai4r
94
94
  attr_reader :data_set
95
95
 
96
96
  # Create a new ID3 classifier. You must provide a DataSet instance
97
- # as parameter.
97
+ # as parameter. The last attribute of each item is considered as the
98
+ # item class.
98
99
  def build(data_set)
99
100
  data_set.check_not_empty
100
101
  @data_set = data_set
@@ -0,0 +1,135 @@
1
+ # Author:: Sergio Fierens (Implementation only)
2
+ # License:: MPL 1.1
3
+ # Project:: ai4r
4
+ # Url:: http://ai4r.rubyforge.org/
5
+ #
6
+ # You can redistribute it and/or modify it under the terms of
7
+ # the Mozilla Public License version 1.1 as published by the
8
+ # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
+
10
+ require File.dirname(__FILE__) + '/../data/data_set.rb'
11
+ require File.dirname(__FILE__) + '/../classifiers/classifier'
12
+ require File.dirname(__FILE__) + '/../neural_network/backpropagation'
13
+
14
+ module Ai4r
15
+ module Classifiers
16
+
17
+ # = Introduction
18
+ #
19
+ # The idea behind the MultilayerPerceptron classifier is to
20
+ # train a Multilayer Perceptron neural network with the provided examples,
21
+ # and predict the class for new data items.
22
+ #
23
+ # = Parameters
24
+ #
25
+ # Use class method get_parameters_info to obtain details on the algorithm
26
+ # parameters. Use set_parameters to set values for this parameters.
27
+ # See Parameterizable module documentation.
28
+ #
29
+ # * :network_class => Neural network implementation class.
30
+ # By default: Ai4r::NeuralNetwork::Backpropagation.
31
+ # * :network_parameters => Parameters to be forwarded to the back end
32
+ # neural ntework.
33
+ # * :hidden_layers => Hidden layer structure. E.g. [8, 6] will generate
34
+ # 2 hidden layers with 8 and 6 neurons each. By default []
35
+ # * :training_iterations => How many times the training should be repeated.
36
+ # By default: 1000.
37
+ # :active_node_value => Default: 1
38
+ # :inactive_node_value => Default: 1
39
+ class MultilayerPerceptron < Classifier
40
+
41
+ attr_reader :data_set, :class_value, :network, :domains
42
+
43
+ parameters_info :network_class => "Neural network implementation class."+
44
+ "By default: Ai4r::NeuralNetwork::Backpropagation.",
45
+ :network_parameters => "parameters to be forwarded to the back end " +
46
+ "neural network.",
47
+ :hidden_layers => "Hidden layer structure. E.g. [8, 6] will generate " +
48
+ "2 hidden layers with 8 and 6 neurons each. By default []",
49
+ :training_iterations => "How many times the training should be " +
50
+ "repeated. By default: 1000",
51
+ :active_node_value => "Default: 1",
52
+ :inactive_node_value => "Default: 0"
53
+
54
+ def initialize
55
+ @network_class = Ai4r::NeuralNetwork::Backpropagation
56
+ @hidden_layers = []
57
+ @training_iterations = 500
58
+ @network_parameters = {}
59
+ @active_node_value = 1
60
+ @inactive_node_value = 0
61
+ end
62
+
63
+ # Build a new MultilayerPerceptron classifier. You must provide a DataSet
64
+ # instance as parameter. The last attribute of each item is considered as
65
+ # the item class.
66
+ def build(data_set)
67
+ data_set.check_not_empty
68
+ @data_set = data_set
69
+ @domains = @data_set.build_domains.collect {|domain| domain.to_a}
70
+ @outputs = @domains.last.length
71
+ @inputs = 0
72
+ @domains[0...-1].each {|domain| @inputs += domain.length}
73
+ @structure = [@inputs] + @hidden_layers + [@outputs]
74
+ @network = @network_class.new @structure
75
+ @training_iterations.times do
76
+ data_set.data_items.each do |data_item|
77
+ input_values = data_to_input(data_item[0...-1])
78
+ output_values = data_to_output(data_item.last)
79
+ @network.train(input_values, output_values)
80
+ end
81
+ end
82
+ return self
83
+ end
84
+
85
+ # You can evaluate new data, predicting its class.
86
+ # e.g.
87
+ # classifier.eval(['New York', '<30', 'F']) # => 'Y'
88
+ def eval(data)
89
+ input_values = data_to_input(data)
90
+ output_values = @network.eval(input_values)
91
+ return @domains.last[get_max_index(output_values)]
92
+ end
93
+
94
+ # Multilayer Perceptron Classifiers cannot generate
95
+ # human-readable rules.
96
+ def get_rules
97
+ return "raise 'Neural networks classifiers do not generate human-readable rules.'"
98
+ end
99
+
100
+ protected
101
+
102
+ def data_to_input(data_item)
103
+ input_values = Array.new(@inputs, @inactive_node_value)
104
+ accum_index = 0
105
+ data_item.each_index do |att_index|
106
+ att_value = data_item[att_index]
107
+ domain_index = @domains[att_index].index(att_value)
108
+ input_values[domain_index + accum_index] = @active_node_value
109
+ accum_index = @domains[att_index].length
110
+ end
111
+ return input_values
112
+ end
113
+
114
+ def data_to_output(data_item)
115
+ output_values = Array.new(@outputs, @inactive_node_value)
116
+ output_values[@domains.last.index(data_item)] = @active_node_value
117
+ return output_values
118
+ end
119
+
120
+ def get_max_index(output_values)
121
+ max_value = @inactive_node_value
122
+ max_index = 0
123
+ output_values.each_index do |output_index|
124
+ if max_value < output_values[output_index]
125
+ max_value = output_values[output_index]
126
+ max_index = output_index
127
+ end
128
+ end
129
+ return max_index
130
+ end
131
+
132
+ end
133
+
134
+ end
135
+ end
@@ -25,7 +25,8 @@ module Ai4r
25
25
  attr_reader :data_set, :rule
26
26
 
27
27
  # Build a new OneR classifier. You must provide a DataSet instance
28
- # as parameter.
28
+ # as parameter. The last attribute of each item is considered as
29
+ # the item class.
29
30
  def build(data_set)
30
31
  data_set.check_not_empty
31
32
  @data_set = data_set
@@ -29,7 +29,8 @@ module Ai4r
29
29
  attr_reader :data_set, :rules
30
30
 
31
31
  # Build a new Prism classifier. You must provide a DataSet instance
32
- # as parameter.
32
+ # as parameter. The last attribute of each item is considered as
33
+ # the item class.
33
34
  def build(data_set)
34
35
  data_set.check_not_empty
35
36
  @data_set = data_set
@@ -25,7 +25,8 @@ module Ai4r
25
25
  attr_reader :data_set, :class_value
26
26
 
27
27
  # Build a new ZeroR classifier. You must provide a DataSet instance
28
- # as parameter.
28
+ # as parameter. The last attribute of each item is considered as
29
+ # the item class.
29
30
  def build(data_set)
30
31
  data_set.check_not_empty
31
32
  @data_set = data_set
@@ -0,0 +1,60 @@
1
+ # Author:: Sergio Fierens (implementation)
2
+ # License:: MPL 1.1
3
+ # Project:: ai4r
4
+ # Url:: http://ai4r.rubyforge.org/
5
+ #
6
+ # You can redistribute it and/or modify it under the terms of
7
+ # the Mozilla Public License version 1.1 as published by the
8
+ # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
+
10
+ require File.dirname(__FILE__) + '/../data/data_set'
11
+ require File.dirname(__FILE__) + '/../clusterers/single_linkage'
12
+
13
+ module Ai4r
14
+ module Clusterers
15
+
16
+ # Implementation of a Hierarchical clusterer with complete linkage.
17
+ # Hierarchical clusteres create one cluster per element, and then
18
+ # progressively merge clusters, until the required number of clusters
19
+ # is reached.
20
+ # With average linkage, the distance between two clusters is computed as
21
+ # the average distance between elements of each cluster.
22
+ class AverageLinkage < SingleLinkage
23
+
24
+ # Build a new clusterer, using data examples found in data_set.
25
+ # Items will be clustered in "number_of_clusters" different
26
+ # clusters.
27
+ def build(data_set, number_of_clusters)
28
+ super
29
+ end
30
+
31
+ # Classifies the given data item, returning the cluster index it belongs
32
+ # to (0-based).
33
+ def eval(data_item)
34
+ super
35
+ end
36
+
37
+ protected
38
+
39
+ # Calculate cluster distance using the average linkage method
40
+ def calc_index_clusters_distance(cluster_a, cluster_b)
41
+ dist_sum = 0.0
42
+ cluster_a.each do |index_a|
43
+ cluster_b.each do |index_b|
44
+ dist_sum += read_distance_matrix(index_a, index_b)
45
+ end
46
+ end
47
+ return dist_sum/(cluster_a.length*cluster_b.length)
48
+ end
49
+
50
+ def distance_between_item_and_cluster(data_item, cluster)
51
+ dist_sum = 0.0
52
+ cluster.data_items.each do |another_item|
53
+ dist_sum += distance(data_item, another_item)
54
+ end
55
+ return dist_sum/cluster.data_items.length
56
+ end
57
+
58
+ end
59
+ end
60
+ end
@@ -7,7 +7,6 @@
7
7
  # the Mozilla Public License version 1.1 as published by the
8
8
  # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
9
 
10
- require "set"
11
10
  require File.dirname(__FILE__) + '/../data/data_set'
12
11
  require File.dirname(__FILE__) + '/../clusterers/k_means'
13
12
 
@@ -25,6 +24,23 @@ module Ai4r
25
24
  attr_reader :data_set, :number_of_clusters, :clusters, :centroids
26
25
  attr_accessor :max_iterations, :distance_function, :refine
27
26
 
27
+ parameters_info :max_iterations => "Maximum number of iterations to " +
28
+ "build the clusterer. By default it is uncapped.",
29
+ :distance_function => "Custom implementation of distance function. " +
30
+ "It must be a closure receiving two data items and return the " +
31
+ "distance bewteen them. By default, this algorithm uses " +
32
+ "ecuclidean distance of numeric attributes to the power of 2.",
33
+ :centroid_function => "Custom implementation to calculate the " +
34
+ "centroid of a cluster. It must be a closure receiving an array of " +
35
+ "data sets, and return an array of data items, representing the " +
36
+ "centroids of for each data set. " +
37
+ "By default, this algorithm returns a data items using the mode "+
38
+ "or mean of each attribute on each data set.",
39
+ :refine => "Boolean value. True by default. It will run the " +
40
+ "classic K Means algorithm, using as initial centroids the " +
41
+ "result of the bisecting approach."
42
+
43
+
28
44
  def intialize
29
45
  @refine = true
30
46
  end
@@ -54,44 +70,6 @@ module Ai4r
54
70
  return self
55
71
  end
56
72
 
57
- # Get info on what can be parameterized on this clusterer algorithm.
58
- # It returns a hash with the following format:
59
- # { :param_name => "Info on the parameter" }
60
- def get_parameters_info
61
- { :max_iterations => "Maximum number of iterations used to bisect a " +
62
- "cluster. By default it is uncapped.",
63
- :distance_function => "Custom implementation of distance function. " +
64
- "It must be a closure receiving two data items and return the " +
65
- "distance bewteen them. By default, this algorithm uses " +
66
- "ecuclidean distance of numeric attributes to the power of 2.",
67
- :refine => "Boolean value. True by default. It will run the " +
68
- "classic K Means algorithm, using as initial centroids the " +
69
- "result of the bisecting approach."
70
- }
71
- end
72
-
73
- # Set parameters on this clusterer instance.
74
- # You must provide a hash with the folowing format:
75
- # { :param_name => parameter_value }
76
- #
77
- # Use get_parameters_info to know what parameters are accepted.
78
- def set_parameters(parameters)
79
- super
80
- if parameters.has_key?(:refine)
81
- @refine = parameters[:refine]
82
- end
83
- return self
84
- end
85
-
86
- # Get parameter values on this clusterer instance.
87
- # Returns a hash with the folowing format:
88
- # { :param_name => parameter_value }
89
- def get_parameters
90
- params = super
91
- params[:refine] = @refine
92
- return params
93
- end
94
-
95
73
  protected
96
74
  def calc_initial_centroids
97
75
  @centroids # Use existing centroids
@@ -31,6 +31,31 @@ module Ai4r
31
31
  raise NotImplementedError
32
32
  end
33
33
 
34
+ protected
35
+ # Usefull as a defult distance function for clustering algorithms
36
+ def euclidean_distance(a, b)
37
+ dist = 0.0
38
+ a.each_index do |index|
39
+ if a[index].is_a?(Numeric) && b[index].is_a?(Numeric)
40
+ dist = dist + ((a[index]-b[index])*(a[index]-b[index]))
41
+ end
42
+ end
43
+ return dist
44
+ end
45
+
46
+ def get_min_index(array)
47
+ min = array.first
48
+ index = 0
49
+ array.each_index do |i|
50
+ x = array[i]
51
+ if x < min
52
+ min = x
53
+ index = i
54
+ end
55
+ end
56
+ return index
57
+ end
58
+
34
59
  end
35
60
  end
36
61
  end
@@ -0,0 +1,62 @@
1
+ # Author:: Sergio Fierens (implementation)
2
+ # License:: MPL 1.1
3
+ # Project:: ai4r
4
+ # Url:: http://ai4r.rubyforge.org/
5
+ #
6
+ # You can redistribute it and/or modify it under the terms of
7
+ # the Mozilla Public License version 1.1 as published by the
8
+ # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
+
10
+ require File.dirname(__FILE__) + '/../data/data_set'
11
+ require File.dirname(__FILE__) + '/../clusterers/single_linkage'
12
+
13
+ module Ai4r
14
+ module Clusterers
15
+
16
+ # Implementation of a Hierarchical clusterer with complete linkage.
17
+ # Hierarchical clusteres create one cluster per element, and then
18
+ # progressively merge clusters, until the required number of clusters
19
+ # is reached.
20
+ # With complete linkage, the distance between two clusters is computed as
21
+ # the maximum distance between elements of each cluster.
22
+ class CompleteLinkage < SingleLinkage
23
+
24
+ # Build a new clusterer, using data examples found in data_set.
25
+ # Items will be clustered in "number_of_clusters" different
26
+ # clusters.
27
+ def build(data_set, number_of_clusters)
28
+ super
29
+ end
30
+
31
+ # Classifies the given data item, returning the cluster index it belongs
32
+ # to (0-based).
33
+ def eval(data_item)
34
+ super
35
+ end
36
+
37
+ protected
38
+
39
+ # Calculate cluster distance using the complete linkage method
40
+ def calc_index_clusters_distance(cluster_a, cluster_b)
41
+ max_dist = 0
42
+ cluster_a.each do |index_a|
43
+ cluster_b.each do |index_b|
44
+ dist = read_distance_matrix(index_a, index_b)
45
+ max_dist = dist if dist > max_dist
46
+ end
47
+ end
48
+ return max_dist
49
+ end
50
+
51
+ def distance_between_item_and_cluster(data_item, cluster)
52
+ max_dist = 0
53
+ cluster.data_items.each do |another_item|
54
+ dist = distance(data_item, another_item)
55
+ max_dist = dist if dist > max_dist
56
+ end
57
+ return max_dist
58
+ end
59
+
60
+ end
61
+ end
62
+ end